diff --git a/input/InitialPopulations/compile/00_master.do b/input/InitialPopulations/compile/00_master.do
index b7509704d..434ef7fcd 100644
--- a/input/InitialPopulations/compile/00_master.do
+++ b/input/InitialPopulations/compile/00_master.do
@@ -8,7 +8,7 @@
 * DATA:         	    UKHLS EUL version - UKDA-6614-stata [to wave n]
 *						WAS EUL version - UKDA-7215-stata [to wave 7]
 * AUTHORS: 				Daria Popova, Justin van de Ven
-* LAST UPDATE:          18 July 2025 DP 
+* LAST UPDATE:          4 Nov 2025 DP 
 ***************************************************************************************
 
 ***************************************************************************************
@@ -36,32 +36,35 @@ set matsize 1000
 *************************************************************************************/
 
 * Working directory
-*global dir_work "C:\MyFiles\99 DEV ENV\JAS-MINE\data work\initial_populations"
-*global dir_work "C:\Users\Patryk\Documents\SP_prep_pop"
-global dir_work "D:\Dasha\ESSEX\ESPON 2024\UK\initial_populations"
+global dir_work "C:\Dasha\ESSEX\_SimPaths\_SimPaths_UK\initial_populations"
 
 * Directory which contains do files
 global dir_do "${dir_work}/do"
-*global dir_do "C:\Users\Patryk\git\SimPathsFork\input\InitialPopulations\compile"
 
-* Directory which contains data files 
-global dir_data "${dir_work}/data"
+* Directory which contains processed data  
+global dir_data "${dir_work}/data" //data
 
 * Directory which contains log files 
 global dir_log "${dir_work}/log"
 
 * Directory which contains UKHLS data
-*global dir_ukhls_data "J:\01 DATA\UK\ukhls\wave14\stata\stata13_se\ukhls"
-global dir_ukhls_data "D:\Dasha\UK-original-data\USoc\UKDA-6614-stata\stata\stata13_se\ukhls"
-*global dir_ukhls_data "C:\Users\Patryk\Documents\SP_prep_pop\ukhls\UKDA-6614-stata\stata\stata13_se\ukhls"
+global dir_ukhls_data "D:\UK-original-data\USoc\UKDA-6614-stata\stata\stata13_se\ukhls" //original_data
+
+* Directory which contains BHPS data
+global dir_bhps_data  "D:\UK-original-data\USoc\UKDA-6614-stata\stata\stata13_se\bhps" //original_data_bhps
 
 * Directory which contains WAS data
-*global dir_was_data "J:\01 DATA\UK\was\wave7\stata\stata13_se"
-global dir_was_data "D:\Dasha\UK-original-data\WAS\UKDA-7215-stata\stata\stata13_se"
-*global dir_was_data "C:\Users\Patryk\Documents\WAS\UKDA-7215-stata\stata\stata13_se"
+global dir_was_data "D:\UK-original-data\WAS\UKDA-7215-stata\stata\stata13_se"
+
+//additional paths to employment history files 
+* Directory which contains processed employment history data
+global dir_data_emphist "${dir_data}/emphist"	//data_emphist 
+ 
+* Directory which contains employment history do-files
+global dir_do_emphist "${dir_do}/do_emphist"	
 
-* Directory which contains original initial popultions 
-global dir_ipop_orig "${dir_work}/original_initial_populations"
+* Directory which contains employment history log files 
+global dir_log_emphist "${dir_log}/emphist" //log_emphist
 
 
 /**************************************************************************************
@@ -88,7 +91,15 @@ wave 12 l 2020-2022
 wave 13 m 2021-2023
 wave 14 n 2022-2024
 */
-global UKHLSwaves "a b c d e f g h i j k l m n" /*all waves*/
+
+global UKHLSwaves "a b c d e f g h i j k l m n" /*all waves*/ //ukhls_all_waves
+global UKHLSwaves_numbers "1 2 3 4 5 6 7 8 9 10 11 12 13 14" //ukhls_all_waves_numbers
+
+global UKHLS_panel_waves "b c d e f g h i j k l m n"
+global UKHLS_panel_waves_numbers "2 3 4 5 6 7 8 9 10 11 12 13 14" //ukhls_waves_numbers
+global UKHLS_waves_prefixed "b_ c_ d_ e_ f_ g_ h_ i_ j_ k_ l_ m_ n_"
+global BHPS_waves "l m n o p q r"
+
 * waves reporting social care module in ukhls - ADL questions added from wave 7 and then every other wave (from 2016)
 global scRecWaves "g i k m"
 * waves reporting social care provided in ukhls (from 2015)
@@ -118,8 +129,10 @@ do "${dir_do}/07_was_wealth_data.do"
 forvalues year = $wealthStartYear / $wealthEndYear {
 	global yearWealth = `year'
 	do "${dir_do}/08_wealth_to_ukhls.do"
-}
+} 
+*check data and slice into initial populations
 do "${dir_do}/09_finalise_input_data.do"
+*descriptives for initial populations and full sample
 do "${dir_do}/10_check_yearly_data.do"
 
 
diff --git a/input/InitialPopulations/compile/01_prepare_UKHLS_pooled_data.do b/input/InitialPopulations/compile/01_prepare_UKHLS_pooled_data.do
index e8f0b2f47..bff821225 100644
--- a/input/InitialPopulations/compile/01_prepare_UKHLS_pooled_data.do
+++ b/input/InitialPopulations/compile/01_prepare_UKHLS_pooled_data.do
@@ -30,12 +30,12 @@ foreach w of global UKHLSwaves {
 	local waveno=strpos("abcdefghijklmnopqrstuvwxyz","`w'")
 	
 	if (`waveno'<13) {
-		use pidp `w'_ivfho `w'_ivfio `w'_hhorig `w'_buno_dv `w'_dvage `w'_sex `w'_depchl `w'_hidp `w'_pno `w'_pns1pid `w'_pns2pid `w'_month `w'_intdaty_dv ///
+		use pidp `w'_ivfho `w'_ivfio `w'_hhorig `w'_memorig `w'_buno_dv `w'_dvage `w'_sex `w'_depchl `w'_hidp `w'_pno `w'_pns1pid `w'_pns2pid `w'_month `w'_intdaty_dv ///
 		`w'_mnspid `w'_fnspid `w'_ppid `w'_ppno `w'_sppid `w'_sex_dv `w'_mastat_dv `w'_gor_dv `w'_age_dv  /* `w'_hgbioad1 `w'_hgbioad2 */ ///
 		`w'_intdatd_dv `w'_intdatm_dv `w'_intdaty_dv `w'_ethn_dv using `w'_indall.dta, clear
 	}
 	else {
-		use pidp `w'_ivfho `w'_ivfio `w'_hhorig `w'_buno_dv `w'_dvage `w'_sex `w'_depchl `w'_hidp `w'_pno `w'_pns1pid `w'_pns2pid `w'_month `w'_intdaty_dv ///
+		use pidp `w'_ivfho `w'_ivfio `w'_hhorig `w'_memorig `w'_buno_dv `w'_dvage `w'_sex `w'_depchl `w'_hidp `w'_pno `w'_pns1pid `w'_pns2pid `w'_month `w'_intdaty_dv ///
 		`w'_mnspid `w'_fnspid `w'_ppid `w'_ppno `w'_sppid `w'_sex_dv `w'_mastat_dv `w'_gor_dv `w'_age_dv  `w'_hgbioad1 `w'_hgbioad2 ///
 		`w'_intdatd_dv `w'_intdatm_dv `w'_intdaty_dv `w'_ethn_dv using `w'_indall.dta, clear
 	}
diff --git a/input/InitialPopulations/compile/02_create_UKHLS_variables.do b/input/InitialPopulations/compile/02_create_UKHLS_variables.do
index 87a4ae85a..42aac7c80 100644
--- a/input/InitialPopulations/compile/02_create_UKHLS_variables.do
+++ b/input/InitialPopulations/compile/02_create_UKHLS_variables.do
@@ -6,7 +6,7 @@
 * COUNTRY:              UK
 * DATA:         	    UKHLS EUL version - UKDA-6614-stata [to wave n]
 * AUTHORS: 				Daria Popova, Justin van de Ven
-* LAST UPDATE:          18 July 2025 DP
+* LAST UPDATE:          3 Nov 2025 DP
 * NOTE:					Called from 00_master.do - see master file for further details
 *						Use -9 for missing values 
 ***************************************************************************************
@@ -713,28 +713,28 @@ la var der "Return to education"
 //fre der
 
 /*****************************Partnership status*******************************/
-recode mastat_dv (2 3 10 = 1 "Partnered") ///
-	(0 1 = 2 "Single never married") /// Includes children under 16
-	(4 5 6 7 8 9 = 3 "Previously partnered") ///
-	, into (dcpst)
-la var dcpst "Partnership status"
-recode dcpst (-8 -2 -1 = -9)
-
-*If idpartner = 0 (because of household splitting), dcpst should be set to 3 depending on mastat_dv value
-replace dcpst = 3 if dcpst == 1 & idpartner <= 0 
-replace dcpst = 1 if idpartner > 0 & !missing(idpartner)
-
-//Children coded as "Never Married" (17 and under chosen as can marry from 18 years onwards)
-replace dcpst = 2 if dag <= 17 & idpartner<0
-//fre dcpst
+gen dcpst = . 
+replace dcpst = 1 if idpartner > 0 & !missing(idpartner) //partnered 
+replace dcpst = 2 if idpartner < 0 | missing(idpartner)
+lab var dcpst "Partnership status"
+lab def dcpst 1 "partnered" 2 "single" 
+lab val dcpst dcpst 
+
+recode dcpst (. = -9)
+/*
 
+* Children coded as "Never Married" 
+Can only marry from age 18 onwards in the simulation 
+*/
+replace dcpst = 2 if dag <= 17 & idpartner < 0
+//fre dcpst  
 
 /*****************************Enter partnership*******************************/
 sort idperson swv 
 cap drop dcpen
 gen dcpen = -9
-replace dcpen=0 if (l.dcpst==2 | l.dcpst==3)
-replace dcpen=1 if dcpst==1 & (l.dcpst==2 | l.dcpst==3)
+replace dcpen=0 if (l.dcpst==2)
+replace dcpen=1 if dcpst==1 & (l.dcpst==2)
 la val dcpen dummy
 la var dcpen "Enter partnership"
 //fre dcpen
@@ -745,7 +745,7 @@ sort idperson swv
 cap drop dcpex 
 gen dcpex=-9
 replace dcpex = 0 if l.dcpst==1
-replace dcpex = 1 if dcpst==3 & l.dcpst==1 
+replace dcpex = 1 if dcpst==2 & l.dcpst==1 
 la val dcpex dummy
 la var dcpex "Exit partnership" 
 //fre dcpex
@@ -757,7 +757,7 @@ la var dcpagdf "Partner's age difference"
 
 
 /*********************************Activity status*****************************/
-recode jbstat (1 2 5 12 13 14 = 1 "Employed or self-employed") ///
+recode jbstat (1 2 5 12 13 14 15 = 1 "Employed or self-employed") ///
 	(7 = 2 "Student") ///
 	(3 6 8 10 11 97 9 4 = 3 "Not employed") /// /*includes apprenticeships, unpaid family business, govt training scheme+retired */
 	, into(les_c3)
@@ -767,7 +767,7 @@ la var les "Activity status"
 replace les_c3 = 2 if dag <= 16
 //People below age to leave home are not at risk of work so set activity status to not employed if not a student
 replace les_c3 = 3 if dag < $age_become_responsible & les_c3 != 2
-
+//fre les_c3
 
 /***********************Activity status variable adding retirement*************/
 *Generate les_c4 variable in addition to the les_c3 variable. Les_c4 adds retired status. 
@@ -777,8 +777,7 @@ replace les_c4 = 4 if jbstat==4
 lab var les_c4 "LABOUR MARKET: Activity status"
 lab define les_c4  1 "Employed or self-employed"  2 "Student"  3 "Not employed"  4 "Retired"
 lab val les_c4 les_c4
-//tab2 les_c3 les_c4
-
+//fre les_c4
 
 /****************************Partner's activity status:***********************/
 preserve
@@ -870,19 +869,88 @@ bys swv idhh: egen dnc = sum(depChild)
 *drop depChild
 la var dnc "Number of dependent children 0 - 18"
 
+/****************************Pension Age***************************************/
+/*cap gen bdt = mdy(1, 15, birthy) /*month of birth is available in special license only*/
+*/
+/*State Retirement Ages for Men in the UK (2009-2023):
+
+2009-2010: 65
+2010-2011: 65
+2011-2012: 65
+2012-2013: 65
+2013-2014: 65
+2014-2015: 65
+2015-2016: 65
+2016-2017: 65
+2017-2018: 65
+2018-2019: 65
+2019-2020: 65
+2020-2021: 66
+2021-2022: 66
+2022-2023: 66
+
+State Retirement Ages for Women in the UK (2009-2023):
+
+2009-2010: 60
+2010-2011: 60
+2011-2012: 60
+2012-2013: 61
+2013-2014: 61
+2014-2015: 62
+2015-2016: 62
+2016-2017: 63
+2017-2018: 63
+2018-2019: 64
+2019-2020: 65
+2020-2021: 65
+2021-2022: 66
+2022-2023: 66
+*/
+gen dagpns = 0
+//for men
+replace dagpns = 1 if dgn==1 & dag>=65 & stm>=2009 & stm<2020 
+replace dagpns = 1 if dgn==1 & dag>=66 & stm>=2020 
+//for women 
+replace dagpns = 1 if dgn==0 & dag>=60 & stm>=2009 & stm<2012
+replace dagpns = 1 if dgn==0 & dag>=61 & stm>=2012 & stm<2014
+replace dagpns = 1 if dgn==0 & dag>=62 & stm>=2014 & stm<2016
+replace dagpns = 1 if dgn==0 & dag>=63 & stm>=2016 & stm<2018
+replace dagpns = 1 if dgn==0 & dag>=64 & stm>=2018 & stm<2019
+replace dagpns = 1 if dgn==0 & dag>=65 & stm>=2019 & stm<2021
+replace dagpns = 1 if dgn==0 & dag>=66 & stm>=2021 
+//fre dagpns
+
+/****************************Pension age of a spouse***************************/
+preserve
+keep swv idperson idhh dagpns
+rename dagpns dagpns_sp
+rename idperson idpartner
+save "$dir_data/temp_dagpns", replace
+restore
+merge m:1 swv idpartner idhh using "$dir_data/temp_dagpns"
+keep if _merge == 1 | _merge == 3
+la var dagpns_sp "Pension age - partner"
+drop _merge
+replace dagpns_sp=-9 if idpartner<0
+
 
 /*******************************Flag for adult children***********************/
+//add parental ages & retirement status
 preserve
 keep if dgn == 0
-keep swv idhh idperson dag
+keep swv idhh idperson dag dagpns les_c4
 rename idperson idmother
 rename dag dagmother
+rename dagpns dagpnsmother
+rename les_c4 les_c4mother
 save "$dir_data/temp_mother_dag", replace
 restore, preserve
 keep if dgn == 1
-keep swv idhh idperson dag
+keep swv idhh idperson dag dagpns les_c4
 rename idperson idfather
 rename dag dagfather
+rename dagpns dagpnsfather
+rename les_c4 les_c4father
 save "$dir_data/temp_father_dag", replace 
 restore
 
@@ -893,20 +961,37 @@ merge m:1 swv idhh idfather using "$dir_data/temp_father_dag"
 keep if _merge == 1 | _merge == 3
 drop _merge
 
-//Adult child is identified on the successful merge with mother / father in the same household and age
-gen adultchildflag = (!missing(dagmother) | !missing(dagfather)) & dag >= $age_become_responsible & idpartner <= 0
-*Introduce a condition that (adult) children cannot be older than parents-15 year of age
-replace adultchildflag = 0 if dag >= dagfather-15 | dag >= dagmother-15 
+/*Individual is considered as adult child if 
+- they have at least one parent in the household (i.e. non-missing parental age) 
+- aged 18+
+- do not have a partner living in the same household 
+- is at least 15 years younger than either of their parents
+- neither of their parents is of the state retirement age in that particular year & neither is retired   
+*/
+gen adultchildflag = (!missing(dagmother)  | !missing(dagfather)) & dag >= $age_become_responsible & idpartner <= 0
+replace adultchildflag = 0 if dag >= dagfather-15 & dag >= dagmother-15 //was previously or ==> replaced with & 
+//fre adultchildflag 
+replace adultchildflag = 0 if (dagpnsmother==1 | les_c4mother==4) & (dagpnsfather ==1  | les_c4father==4) 
+tab2 adultchildflag swv , row  
+
+/*Account for cases missing information
+replace adultchildflag = -9 if idmother>0 & ///
+	(dagmother==. | dagmother<0 | les_c4mother==. | les_c4mother<0) & dag >= 17
+replace adultchildflag = -9 if idfather>0 & ///
+	(dagfather==. | dagfather<0 | les_c4father==. | les_c4father<0) & dag >= 17
+fre adultchildflag	
+2.7% have missing info on one of their parents, not sure if it is worth dropping them? 
+*/
 
 
 /************************Household composition*********************************/
 cap gen dhhtp_c4 = -9
 replace dhhtp_c4 = 1 if dcpst == 1 & dnc == 0 //Couple, no children
 replace dhhtp_c4 = 2 if dcpst == 1 & dnc > 0 & !missing(dnc) //Couple, children
-replace dhhtp_c4 = 3 if (dcpst == 2 | dcpst == 3) & (dnc == 0 | dag <= $age_become_responsible | adultchildflag== 1) 
+replace dhhtp_c4 = 3 if (dcpst == 2) & (dnc == 0 | dag <= $age_become_responsible | adultchildflag== 1) 
 /*Single, no children (Note: adult children and children below age to become responsible 
 should be assigned "no children" category, even if there are some children in the household)*/
-replace dhhtp_c4 = 4 if (dcpst == 2 | dcpst == 3) & dnc > 0 & !missing(dnc) & dhhtp_c4 != 3 //Single, children
+replace dhhtp_c4 = 4 if (dcpst == 2) & dnc > 0 & !missing(dnc) & dhhtp_c4 != 3 //Single, children
 
 la def dhhtp_c4_lb 1"Couple with no children" 2"Couple with children" 3"Single with no children" 4"Single with children"
 la values dhhtp_c4 dhhtp_c4_lb
@@ -998,75 +1083,11 @@ la var drtren "DEMOGRAPHIC: Enter retirement"
 //fre drtren
 
 
-/****************************Pension Age***************************************/
-/*cap gen bdt = mdy(1, 15, birthy) /*month of birth is available in special license only*/
-*/
-/*State Retirement Ages for Men in the UK (2009-2023):
-
-2009-2010: 65
-2010-2011: 65
-2011-2012: 65
-2012-2013: 65
-2013-2014: 65
-2014-2015: 65
-2015-2016: 65
-2016-2017: 65
-2017-2018: 65
-2018-2019: 65
-2019-2020: 65
-2020-2021: 66
-2021-2022: 66
-2022-2023: 66
-
-State Retirement Ages for Women in the UK (2009-2023):
-
-2009-2010: 60
-2010-2011: 60
-2011-2012: 60
-2012-2013: 61
-2013-2014: 61
-2014-2015: 62
-2015-2016: 62
-2016-2017: 63
-2017-2018: 63
-2018-2019: 64
-2019-2020: 65
-2020-2021: 65
-2021-2022: 66
-2022-2023: 66
-*/
-gen dagpns = 0
-//for men
-replace dagpns = 1 if dgn==1 & dag>=65 & stm>=2009 & stm<2020 
-replace dagpns = 1 if dgn==1 & dag>=66 & stm>=2020 
-//for women 
-replace dagpns = 1 if dgn==0 & dag>=60 & stm>=2009 & stm<2012
-replace dagpns = 1 if dgn==0 & dag>=61 & stm>=2012 & stm<2014
-replace dagpns = 1 if dgn==0 & dag>=62 & stm>=2014 & stm<2016
-replace dagpns = 1 if dgn==0 & dag>=63 & stm>=2016 & stm<2018
-replace dagpns = 1 if dgn==0 & dag>=64 & stm>=2018 & stm<2019
-replace dagpns = 1 if dgn==0 & dag>=65 & stm>=2019 & stm<2021
-replace dagpns = 1 if dgn==0 & dag>=66 & stm>=2021 
-
-
-/****************************Pension age of a spouse***************************/
-preserve
-keep swv idperson idhh dagpns
-rename dagpns dagpns_sp
-rename idperson idpartner
-save "$dir_data/temp_dagpns", replace
-restore
-merge m:1 swv idpartner idhh using "$dir_data/temp_dagpns"
-keep if _merge == 1 | _merge == 3
-la var dagpns_sp "Pension age - partner"
-drop _merge
-replace dagpns_sp=-9 if idpartner<0
-
 
 /************************************JBSTAT: Not Retired***********************/
-gen lesnr_c2 = . 
-replace lesnr_c2 = 1 if (jbstat ==1 | jbstat==2) /*employed*/
-replace lesnr_c2 = 2 if jbstat==3 | jbstat==5 | jbstat==6 | jbstat==8 | jbstat==9 | jbstat==10 | jbstat==11 | jbstat==14 | jbstat==97 
+gen lesnr_c2 = -9 
+replace lesnr_c2 = 1 if les_c3==1  
+replace lesnr_c2 = 2 if les_c3==2 | les_c3==3  
 lab var lesnr_c2 "Not retired work status"
 lab define lesnr_c2 1 "in work" 2 "not in work"
 lab val lesnr_c2 lesnr_c2 
@@ -1074,22 +1095,22 @@ lab val lesnr_c2 lesnr_c2
 
 /************************Exited parental home*********************************/
 /*Generated from fnspid and/or mnspid. 1 means that individual no longer lives with a parent (fnspid & mnspid is equal to missing)
- when in the previous wave they lived with a parent  (fnspid or mnspid not equal to missing).*/
-/*
-bysort swv: fre mnspid if mnspid<=0 
-bysort swv: fre fnspid if fnspid<=0 
-bysort swv: fre mnspid if mnspid>=. 
-bysort swv: fre fnspid if fnspid>=. 
+ when in the previous wave they lived with a parent  (fnspid or mnspid not equal to missing).
+NOTE: Leaving the parental home was synchronised with the definition of adult child; 
+an individual can leave the parental home unless they are a "responsible adult" (their both parents retired). 
 */
 sort idperson swv 
-gen dlftphm = -9 if (l.fnspid<0 & l.mnspid<0) //those who did not live with parents in the same hh
-replace dlftphm=0 if (l.fnspid>0 | l.mnspid>0) //those who lived with at least one parent
-replace dlftphm =1 if (fnspid<0 & mnspid<0) & (l.fnspid>0 | l.mnspid>0) //lived with at least one parent but not anymore 
-bys idperson: replace dlftphm =-9 if _n==1  //this condition will not be applicable for first year in the panel//
-la val dlftphm dummy
-la var dlftphm "DEMOGRAPHIC: Exited Parental Home"
-//bys swv: fre dlftphm
-
+gen dlftphm = -9 
+replace dlftphm = 0 if adultchildflag[_n-1] == 1  & idperson == idperson[_n-1] & swv == swv[_n-1] + 1
+replace dlftphm = 0 if dag == 18 & adultchildflag == 1 
+replace dlftphm = 1 if adultchildflag == 0 & adultchildflag[_n-1] == 1 & idperson == idperson[_n-1]  & swv == swv[_n-1] + 1
+lab var dlftphm "DEMOGRAPHIC: Exit the Parental Home"
+/*
+tab dlftphm swv, col
+tab dlftphm stm, col
+tab dlftphm dun 
+tab dlftphm adultchildflag 
+*/
 
 /*********************************Left education*******************************/
 sort idperson swv 
@@ -1148,6 +1169,15 @@ fre dukfr
 
 
 /************************Number of newborn*************************/
+/*NOTE: The approach below was not entirely correct for identifying newborns.
+* It defines newborns based on child age (dag <= 1), not on actual birth events.
+* As a result, it counts all children aged under one at interview, not just those 
+* born since the previous wave. The same baby can be counted twice across waves,
+* and adopted or stepchildren under one may also be included.
+* At the BHPS–UKHLS transition, this method overcounts legacy BHPS infants 
+* who were already born before the merge but still under one year old in wave B.
+
+* reported since the last interview, linked to the reporting parent (usually the mother).
 cap gen child0 = 0
 replace child0=1 if dag<=1 
 
@@ -1173,7 +1203,137 @@ replace mother_dchpd=0 if dgn==1
 drop dchpd
 rename mother_dchpd dchpd
 lab var dchpd "Women's number of newborn children"
+*/ 
+
+save "$dir_data\ukhls_pooled_all_obs_02.dta", replace 
+************************************************************************
+* Number of newborn from "newborn" datasets 
+************************************************************************
+/*DP: This code uses the UKHLS newborn module, where each row directly represents a birth event (not inferred from child age).
+Each record corresponds to a child newly reported since the last interview. We exclude BHPS “legacy” infants in wave B to prevent overcounting at the merge.
+- It is more conceptually exact – counts actual reported births, not inferred ones.
+- No double-counting across waves – each newborn appears only once.
+- Handles BHPS transition properly – avoids inflating wave B with pre-existing BHPS babies (note that in original Cara's SAS code all BHPS newborns were dropped which I think shoudn't happen,
+  so Cara's version was underestimating number of newborns. 
+*/
+
+* Combine newborn files (b–n) into one long-format dataset
+clear
+
+local firstwave : word 1 of $UKHLS_panel_waves
+
+* --- Load the first wave ---
+use "${dir_ukhls_data}/`firstwave'_newborn.dta", clear
+gen swv = "`firstwave'"
+
+* Remove wave prefix from variable names
+local prefix = "`firstwave'_"
+foreach var of varlist `firstwave'_* {
+    local base = subinstr("`var'", "`prefix'", "", .)
+    rename `var' `base'
+}
+
+* Save as base file
+save "${dir_data}/temp_uknbrn.dta", replace
+
+* --- Append remaining waves ---
+foreach w of global UKHLS_panel_waves {
+    if "`w'" != "`firstwave'" {
+        di as text "Appending wave `w'..."
+        use "${dir_ukhls_data}/`w'_newborn.dta", clear
+        gen swv = "`w'"
+        
+        * Remove wave prefix 
+        local prefix = "`w'_"
+        capture unab prefixed : `w'_*
+        if _rc == 0 {
+            foreach var of local prefixed {
+                local base = subinstr("`var'", "`prefix'", "", .)
+                rename `var' `base'
+            }
+        }
+        
+        * Append to the long dataset 
+        append using "${dir_data}/temp_uknbrn.dta"
+        save "${dir_data}/temp_uknbrn.dta", replace
+    }
+}
+//convert wave number to numeric 
+gen swv_num = .
+local i = 1
+foreach w of global UKHLS_panel_waves {
+    local num : word `i' of $UKHLS_panel_waves_numbers
+    replace swv_num = `num' if swv == "`w'"
+    local ++i
+}
+drop swv
+rename swv_num swv
+save "${dir_data}/temp_uknbrn.dta", replace
+
+* Count all genuine newborns (UKHLS + BHPS), excludes BHPS legacy infants in wave B
+use "${dir_data}/temp_uknbrn.dta", clear
+
+keep pidp swv memorig lchlv
+keep if lchlv == 1
+
+* Define newborn indicator
+gen byte nbrn = 0
+* UKHLS-origin respondents (memorig = 1, 2, 7, 8):
+* Always count their newborns. These are all part of the original or ethnic minority boost samples.
+replace nbrn = 1 if inlist(memorig, 1, 2, 7, 8)
+* BHPS-origin respondents (memorig = 3, 4, 5, 6):
+* The BHPS sample was integrated into UKHLS starting from wave B (2010–2012).
+* Infants recorded at that point include "legacy" BHPS babies already born before
+* the merge — not genuine new births within the UKHLS observation window.
+** To avoid overcounting these legacy infants, we exclude BHPS-origin newborns
+* only in their first UKHLS wave (wave B). From wave C onward, BHPS households
+* are fully integrated, so new births are genuine new births and should be counted.
+replace nbrn = 1 if inlist(memorig, 3, 4, 5, 6) & swv != 2
+
+* Collapse to parent-wave level ==> both parents may report the same child of they are in the same hh
+bys pidp swv: egen dchpd = total(nbrn)
+label var dchpd "Number of newborn children (UKHLS + BHPS, excl. BHPS legacy infants in wave B)"
+bys pidp swv: keep if _n == 1 //(376 observations deleted)
+rename pidp idperson
+save "${dir_data}/temp_parent_dchpd.dta", replace
+
+* Merge into main person-wave dataset
+use "$dir_data\ukhls_pooled_all_obs_02.dta", clear
+merge 1:1 idperson swv using "${dir_data}/temp_parent_dchpd.dta"
+keep if _merge ==1 | _merge==3
+drop _merge
+
+* After merging: fill missing with 0 
+replace dchpd = 0 if missing(dchpd) 
+label var dchpd "Number of newborn children (UKHLS + BHPS, excl. BHPS legacy infants in wave B)"
 
+/*check how many hh reported same newborn twice because both parents are respondents
+preserve 
+* Keep only cases with at least one newborn
+keep if dchpd > 0
+* Keep only core identifiers and gender
+keep idperson idhh swv dgn dchpd
+
+* Count households with both male and female respondents reporting newborns
+bysort idhh swv: egen hh_births = total(dchpd>0)
+bysort idhh swv: egen men_births = total(dchpd>0 & dgn==1)
+bysort idhh swv: egen women_births = total(dchpd>0 & dgn==0)
+
+* Mark households where both genders reported at least one newborn
+gen both_parents = (men_births>0 & women_births>0)
+
+* Summarise how common these are
+tab men_births
+tab women_births
+tab both_parents
+*No such cases, new births are reported by women only 
+restore 
+*/
+
+* Note that for the estimates we will only keep newborns who are reported by mothers, but here we keep all reported newborns for each respondent  
+tab2 swv dchpd if dgn==1, m row 
+tab2 swv dchpd if dgn==0 & sprfm==1, m row 
+tab2 swv dchpd if dgn==0 & sprfm==0, m row
 
 /*****************************In educational age range*************************/
 gen sedag = 1 if dvage >= 16 & dvage <= 29
@@ -1620,8 +1780,6 @@ foreach var in idhh idperson idpartner idfather idmother dct drgn1 dwt dnc02 dnc
 }
 
 
-
-
 *recode missings in weights to zero. 
 foreach var in dimlwt disclwt dimxwt dhhwt {
 	qui recode `var' (.=0) (-9/-1=0) 
@@ -1657,6 +1815,32 @@ isid idperson idhh swv
 * save the whole pooled dataset that will be used for regression estimates
 *******************************************************************************/
 save "$dir_data\ukhls_pooled_all_obs_02.dta", replace 
+
+
+
+/*********************** Run employment history do-files to produce liwwh *******************************/
+* 01_Intdate.do: set up cross-wave file of interview dates 
+* ==> needed to link previous wave interview date to each respondent*/
+do ${dir_do_emphist}/00_Master_emphist.do  
+
+use "$dir_data\ukhls_pooled_all_obs_02.dta", clear 
+
+merge 1:1 idperson swv using ${dir_data_emphist}/temp_liwwh, keepusing (liwwh)
+//This is done analogous to UKMOD input data 
+drop if _merge==2
+replace liwwh=12 if _merge==1 
+replace liwwh=0 if _merge==1 & les_c3 !=1 //assume zero months if not in employment  
+replace liwwh=-9 if swv==1
+
+replace liwwh = liwwh/12  
+label var liwwh  "Total years in employment since Jan 2007"
+
+bys swv: fre liwwh if dag<16
+bys swv: fre liwwh if dag>=16
+
+drop _merge
+save "$dir_data\ukhls_pooled_all_obs_02.dta", replace 
+
 cap log close 
 
 
@@ -1666,7 +1850,6 @@ cap log close
 #delimit ;
 local files_to_drop 
 	father_edu.dta
-	mother_dchpd.dta 
 	mother_edu.dta 
 	temp.dta
 	temp_age.dta
@@ -1682,7 +1865,8 @@ local files_to_drop
 	temp_ypnb.dta
 	tmp_partnershipDuration.dta
 	temp_dot01.dta
-	
+	temp_uknbrn.dta
+	temp_parent_dchpd.dta
 	;
 #delimit cr // cr stands for carriage return
 
diff --git a/input/InitialPopulations/compile/06_reweight_and_slice.do b/input/InitialPopulations/compile/06_reweight_and_slice.do
index d94be7bd9..f3e0aa8f6 100644
--- a/input/InitialPopulations/compile/06_reweight_and_slice.do
+++ b/input/InitialPopulations/compile/06_reweight_and_slice.do
@@ -3,7 +3,7 @@
 *	WEIGHT ADJUSTMENT TO ACCOUNT FOR USING HOUSEHOLDS WITHOUT MISSING VALUES
 *	
 *	AUTH: Patryk Bronka, Daria Popova, Justin van de Ven
-*	LAST EDIT: 18 July 2025 DP 
+*	LAST EDIT: 21 Oct 2025 DP 
 *
 *********************************************************************/
 ********************************************************************************
@@ -60,7 +60,7 @@ recode hh_size (1=1) (2=2) (3=3) (4/max=4) , gen(hhsize_cat2)
 
 /*Household-level probit. Model probabiltiy of being a complete household conditional on presence of people
  of certain education age gender combination, marital status and region.*/
-probit complete_hh _Ideh* dcpstcat* ib8.drgn1  i.stm , vce(robust) iterate(20) //i.hhsize_cat2 DP: dropped as otherwise does not converge
+probit complete_hh _Ideh* dcpstcat* ib8.drgn1  i.stm , vce(robust) iterate(20) //i.hhsize_cat2, dropped as otherwise does not converge
 
 *Predict probability of being a complete household
 predict pr_comphh
diff --git a/input/InitialPopulations/compile/08_wealth_to_ukhls.do b/input/InitialPopulations/compile/08_wealth_to_ukhls.do
index 40be09823..511ebb069 100644
--- a/input/InitialPopulations/compile/08_wealth_to_ukhls.do
+++ b/input/InitialPopulations/compile/08_wealth_to_ukhls.do
@@ -404,10 +404,10 @@ save "population_initial_fs_UK_$yearWealth", replace
 
 /**************************************************************************************
 * clean-up and exit
-*************************************************************************************/
+*************************************************************************************
 #delimit ;
 local files_to_drop 
-	ukhls_wealthtemp.dta
+    ukhls_wealthtemp.dta
 	ukhls_wealthtemp1.dta
 	ukhls_wealthtemp2.dta
 	ukhls_wealthtemp3.dta
@@ -417,8 +417,8 @@ local files_to_drop
 foreach file of local files_to_drop { 
 	erase "$dir_data/`file'"
 }
-
-
+	
+*/
 /**************************************************************************************
 *	fin
 **************************************************************************************/
diff --git a/input/InitialPopulations/compile/09_finalise_input_data.do b/input/InitialPopulations/compile/09_finalise_input_data.do
index 8d3d0b202..d261291ad 100644
--- a/input/InitialPopulations/compile/09_finalise_input_data.do
+++ b/input/InitialPopulations/compile/09_finalise_input_data.do
@@ -6,7 +6,7 @@
 * COUNTRY:              UK
 * DATA:         	    UKHLS EUL version - UKDA-6614-stata [to wave n]
 * AUTHORS: 				Daria Popova, Justin van de Ven
-* LAST UPDATE:          18 July 2025 
+* LAST UPDATE:          3 Nov 2025 
 * NOTE:					Called from 00_master.do - see master file for further details
 ***************************************************************************************
 
@@ -181,23 +181,23 @@ forvalues yy = $firstSimYear/$lastSimYear {
 
 	*limit saved variables
 	keep idhh idbenefitunit idperson idpartner idmother idfather pno swv dgn dag dcpst dnc02 dnc ded deh_c3 sedex jbstat les_c3 dlltsd dlltsd01 dhe ydses_c5 ///
-	yplgrs_dv ypnbihs_dv yptciihs_dv dhhtp_c4 ssscp dcpen dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 ///
+	yplgrs_dv ypnbihs_dv yptciihs_dv dhhtp_c4 ssscp dcpen dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm dchpd dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 ///
 	stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw l1_lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 adultchildflag multiplier dwt ///
 	potential_earnings_hourly l1_potential_earnings_hourly liquid_wealth tot_pen nvmhome need_socare formal_socare_hrs partner_socare_hrs daughter_socare_hrs son_socare_hrs other_socare_hrs formal_socare_cost carehoursprovidedweekly ///
         econ_benefits econ_benefits_nonuc econ_benefits_uc ///
-	ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dls dot dot01 unemp financial_distress 
+	ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dls dot dot01 unemp financial_distress liwwh
 	
 	order idhh idbenefitunit idperson idpartner idmother idfather pno swv dgn dag dcpst dnc02 dnc ded deh_c3 sedex jbstat les_c3 dlltsd dlltsd01 dhe ydses_c5 yplgrs_dv ypnbihs_dv yptciihs_dv dhhtp_c4 ssscp dcpen ///
-	dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw l1_lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 adultchildflag ///
+	dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm  dchpd dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw l1_lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 adultchildflag ///
 	multiplier dwt potential_earnings_hourly l1_potential_earnings_hourly liquid_wealth tot_pen nvmhome need_socare formal_socare_hrs partner_socare_hrs daughter_socare_hrs son_socare_hrs other_socare_hrs formal_socare_cost carehoursprovidedweekly ///
         econ_benefits econ_benefits_nonuc econ_benefits_uc ///
-	ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dls dot dot01 unemp financial_distress
+	ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dls dot dot01 unemp financial_distress liwwh
 	
 	recode idhh idbenefitunit idperson idpartner idmother idfather pno swv dgn dag dcpst dnc02 dnc ded deh_c3 sedex jbstat les_c3 dlltsd dlltsd01 dhe ydses_c5 yplgrs_dv ypnbihs_dv yptciihs_dv dhhtp_c4 ssscp ///
-	dcpen dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw l1_lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 ///
+	dcpen dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm  dchpd dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw l1_lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 ///
 	adultchildflag multiplier dwt potential_earnings_hourly l1_potential_earnings_hourly liquid_wealth tot_pen nvmhome need_socare formal_socare_hrs partner_socare_hrs daughter_socare_hrs son_socare_hrs other_socare_hrs carehoursprovidedweekly ///
         econ_benefits econ_benefits_nonuc econ_benefits_uc ///
-	formal_socare_cost ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dls dot dot01 unemp financial_distress (missing=-9)
+	formal_socare_cost ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dls dot dot01 unemp financial_distress liwwh (missing=-9)
 	
 	gsort idhh idbenefitunit idperson
 	save "$dir_data/population_initial_UK_$year.dta", replace
@@ -207,11 +207,16 @@ forvalues yy = $firstSimYear/$lastSimYear {
 }
 
 cap log close
-/****************************************************************************************
+
+****************************************************************************************
 * finalise
 ***************************************************************************************
 #delimit ;
 local files_to_drop 
+	ukhls_wealthtemp.dta
+	ukhls_wealthtemp1.dta
+	ukhls_wealthtemp2.dta
+	ukhls_wealthtemp3.dta
 	was_wealthdata.dta
 	;
 #delimit cr // cr stands for carriage return
@@ -219,7 +224,7 @@ local files_to_drop
 foreach file of local files_to_drop { 
 	erase "$dir_data/`file'"
 }
-*/
+
 
 ***************************************************************************************
 * end
diff --git a/input/InitialPopulations/compile/10_check_yearly_data.do b/input/InitialPopulations/compile/10_check_yearly_data.do
index 9020c467c..1566efb6d 100644
--- a/input/InitialPopulations/compile/10_check_yearly_data.do
+++ b/input/InitialPopulations/compile/10_check_yearly_data.do
@@ -6,7 +6,7 @@
 * COUNTRY:              UK
 * DATA:         	    UKHLS EUL version - UKDA-6614-stata [to wave n]
 * AUTHORS: 				Daria Popova
-* LAST UPDATE:          18 July 2025 DP 
+* LAST UPDATE:          3 Nov 2025 DP 
 * NOTE:					Called from 00_master.do - see master file for further details
 ***************************************************************************************/*
 set matsize 11000, permanently
@@ -52,7 +52,8 @@ dcpagdf
 ynbcpdf_dv                  
 der                           
 sedag                        
-sprfm                         
+sprfm  
+dchpd                       
 dagsp                     
 dehsp_c3                     
 dhesp                          
@@ -96,6 +97,7 @@ unemp
 dls
 financial_distress
 carehoursprovidedweekly
+liwwh
 ;
 #delimit cr // cr stands for carriage return
 
@@ -135,8 +137,7 @@ idfather
 pno                       
 swv                            
 dgn                           
-dag                            
-dcpst                          
+dag          
 dnc02                           
 dnc                           
 ded
@@ -156,7 +157,8 @@ dcpagdf
 ynbcpdf_dv                  
 der                           
 sedag                        
-sprfm                         
+sprfm 
+dchpd                        
 dagsp                         
 stm                                              
 dhm                      
@@ -169,7 +171,6 @@ multiplier
 dwt                              
 dcpst_1 
 dcpst_2 
-dcpst_3 
 deh_c3_1 
 deh_c3_2 
 deh_c3_3 
@@ -257,6 +258,7 @@ unemp
 dls
 financial_distress
 carehoursprovidedweekly
+liwwh
 	;
 #delimit cr // cr stands for carriage return 
 
@@ -302,7 +304,7 @@ qui sum `varlist2' , de
 outreg2 using "$dir_data/population_initial_UK_sumstats.xls" if stm==`year', sum(log) append cttop(`year') keep (`varlist2')
 }
 
-/*
+
 **********************************************************************
 *output summary stats for new initial populations before dropping hhs*
 **********************************************************************
@@ -346,40 +348,10 @@ qui sum `varlist2' , de
 outreg2 using "$dir_data/population_initial_fs_UK_sumstats.xls" if stm==`year', sum(log) append cttop(`year') keep (`varlist2')
 }
 
-*/
+
 
 cap erase "$dir_data/population_initial_UK_sumstats.txt"
 cap erase "$dir_data/population_initial_fs_UK_sumstats.txt"
 
 cap log close            
  
-/*  
-*************************************************************
-*clean up new initial populations - keep only required vars * 
-*************************************************************
-forvalues year=2010/2023 {
-insheet using "$dir_data/population_initial_UK_`year'.csv", clear  
-
-	*limit saved variables
-	keep idhh idbenefitunit idperson idpartner idmother idfather pno swv dgn dag dcpst dnc02 dnc ded deh_c3 sedex jbstat les_c3 dlltsd dlltsd01 dhe ydses_c5 ///
-	yplgrs_dv ypnbihs_dv yptciihs_dv dhhtp_c4 ssscp dcpen dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 ///
-	stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 adultchildflag multiplier dwt ///
-	potential_earnings_hourly l1_potential_earnings_hourly liquid_wealth need_socare formal_socare_hrs partner_socare_hrs daughter_socare_hrs son_socare_hrs other_socare_hrs formal_socare_cost ///
-	ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dot dot01 unemp dhe_mcssp dhe_pcssp 
-	
-	order idhh idbenefitunit idperson idpartner idmother idfather pno swv dgn dag dcpst dnc02 dnc ded deh_c3 sedex jbstat les_c3 dlltsd dlltsd01 dhe ydses_c5 yplgrs_dv ypnbihs_dv yptciihs_dv dhhtp_c4 ssscp dcpen ///
-	dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 adultchildflag ///
-	multiplier dwt potential_earnings_hourly l1_potential_earnings_hourly liquid_wealth need_socare formal_socare_hrs partner_socare_hrs daughter_socare_hrs son_socare_hrs other_socare_hrs formal_socare_cost ///
-	ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dot dot01 unemp 
-	
-	recode idhh idbenefitunit idperson idpartner idmother idfather pno swv dgn dag dcpst dnc02 dnc ded deh_c3 sedex jbstat les_c3 dlltsd dlltsd01 dhe ydses_c5 yplgrs_dv ypnbihs_dv yptciihs_dv dhhtp_c4 ssscp ///
-	dcpen dcpyy dcpex dcpagdf ynbcpdf_dv der sedag sprfm dagsp dehsp_c3 dhesp lessp_c3 dehm_c3 dehf_c3 stm lesdf_c4 ppno dhm scghq2_dv dhh_owned lhw drgn1 dct dwt_sampling les_c4 dhm_ghq lessp_c4 ///
-	adultchildflag multiplier dwt potential_earnings_hourly l1_potential_earnings_hourly liquid_wealth need_socare formal_socare_hrs partner_socare_hrs daughter_socare_hrs son_socare_hrs other_socare_hrs ///
-	formal_socare_cost ypncp ypnoab aidhrs carewho dhe_mcs dhe_pcs dhe_mcssp dhe_pcssp dot dot01 unemp  (missing=-9)
-	
-	gsort idhh idbenefitunit idperson
-	save "$dir_data/population_initial_UK_`year'.dta", replace
-	export delimited using "$dir_data/population_initial_UK_`year'.csv", nolabel replace
-}
-*/
-
diff --git a/input/InitialPopulations/compile/RegressionEstimates/master.do b/input/InitialPopulations/compile/RegressionEstimates/master.do
index 373b7f3f8..4e4337a13 100644
--- a/input/InitialPopulations/compile/RegressionEstimates/master.do
+++ b/input/InitialPopulations/compile/RegressionEstimates/master.do
@@ -8,7 +8,7 @@
 * DATA:         	    UKHLS EUL version - UKDA-6614-stata [to wave n]
 *
 * AUTHORS: 				Daria Popova, Justin van de Ven
-* LAST UPDATE:          1 July 2025 DP  
+* LAST UPDATE:          5 Nov 2025 DP  
 ***************************************************************************************
 
 ***************************************************************************************
@@ -47,7 +47,7 @@ set matsize 1000
 **************************************************************************************/
 
 * Working directory
-global dir_work "D:\Dasha\ESSEX\ESPON 2024\UK\regression_estimates"
+global dir_work "D:\Dasha\ESSEX\_SimPaths\_SimPaths_UK\regression_estimates"
 
 * Directory which contains do files
 global dir_do "${dir_work}/do"
@@ -62,26 +62,26 @@ global dir_raw_results "${dir_work}/raw_results"
 global dir_results "${dir_work}/results"
 
 * Directory which contains pooled dataset for estimates  
-global dir_ukhls_data "D:\Dasha\ESSEX\ESPON 2024\UK\initial_populations\data"
+global dir_ukhls_data "D:\Dasha\ESSEX\_SimPaths\_SimPaths_UK\initial_populations\data"
 
 * Directory containing external input data 
 global dir_external_data "$dir_work/external_data"
 
 * Directory containing results of comparison of various weights   
-global weight_checks "D:\Dasha\ESSEX\ESPON 2024\UK\regression_estimates\weight_checks"
+global weight_checks "${dir_work}/weight_checks"
 
 *********************Internal validation****************************************
 * Directory to save data for internal validation 
-global dir_validation_data "D:\Dasha\ESSEX\ESPON 2024\UK\regression_estimates\internal_validation\data"
+global dir_validation_data "${dir_work}/internal_validation/data"
 
 * Directory for internal validation do-files 
-global dir_do_validation "D:\Dasha\ESSEX\ESPON 2024\UK\regression_estimates\internal_validation\do_files"
+global dir_do_validation "${dir_work}/internal_validation/do_files"
 
 * Directory for internal validation do-files 
-global dir_do_validation "D:\Dasha\ESSEX\ESPON 2024\UK\regression_estimates\internal_validation\do_files"
+global dir_do_validation "${dir_work}/internal_validation/do_files"
 
 * Directory for internal validation do-files 
-global dir_validation_graphs "D:\Dasha\ESSEX\ESPON 2024\UK\regression_estimates\internal_validation\graphs"
+global dir_validation_graphs "${dir_work}/internal_validation/graphs"
 
 global countyy "UK" 
 
@@ -108,9 +108,7 @@ do "${dir_do}/reg_wages.do"
 do "${dir_do}/reg_income.do"
 
 
-
-
-/*******************************************************************************
+*******************************************************************************
 * INTERNAL VALIDATION FILES
 ******************************************************************************
 
@@ -131,7 +129,7 @@ do "$dir_do_validation/int_val_retirement.do"
 do "$dir_do_validation/int_val_wages.do"	
 
 do "$dir_do_validation/int_val_income.do"	
-*/
+
 /**************************************************************************************
 * END OF FILE
 **************************************************************************************/
diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do b/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do
index 95ed15194..c40bd79b5 100644
--- a/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do
+++ b/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do
@@ -1,9 +1,9 @@
-********************************************************************************
+*********************************************************************************
 * PROJECT:  		ESPON 
 * SECTION:			Fertility
 * OBJECT: 			Final Probit Models
 * AUTHORS:			Daria Popova, Justin van de Ven
-* LAST UPDATE:		26 Aug 2025 DP  
+* LAST UPDATE:		21 Oct 2025 DP  
 * COUNTRY: 			UK 
 *
 * NOTES:			    Simplified the fertility process for those in this initial 
@@ -37,7 +37,7 @@ putexcel set "$dir_results/reg_fertility", sheet("Info") replace
 putexcel A1 = "Description:"
 putexcel B1 = "Model parameters governing projection of fertility"
 putexcel A2 = "Authors:	Patryk Bronka, Justin van de Ven, Daria Popova" 
-putexcel A3 = "Last edit: 1 July 2025 DP"
+putexcel A3 = "Last edit: 3 Nov 2025 DP"
 
 putexcel A4 = "Process:", bold
 putexcel B4 = "Description:", bold
@@ -49,7 +49,7 @@ putexcel B6 = "Probit regression estimates of probability of having a child for
 putexcel A10 = "Notes:", bold
 putexcel B10 = "All processes: replaced dhe with dhe_pcs and dhe_mcs, added ethnicity-4 cat (dot), covid dummies (y2020 y2021)"
 putexcel B11 = "F1a: only 24 obs having a child when in initial education spell, therefore have to take away some covariates to obtain estimate"
-
+putexcel B12 = "All processes: replaced dcpst with a dummy version (1=partnered 2=single)"
 
 putexcel set "$dir_results/reg_fertility", sheet("Gof") modify
 putexcel A1 = "Goodness of fit", bold		
@@ -63,9 +63,11 @@ xtset idperson swv
 * Process F1a: Probabiltiy of having a child 
 * Sample: Women aged 18-44, in initial education spell education.
 * DV: New born child dummy (note that in the estimation sample dchpd contains the number of newborn children, which could be >1) 
-
+tab sprfm dgn
 replace dchpd=1 if dchpd>1 & dchpd<. 
-// only 69 ppl meet the condition in total
+replace dchpd = 0 if dchpd==-9 
+tab2 swv dchpd, row
+
 tab dchpd if (sprfm == 1 & ded == 1) 
 
 /*/////////////////////////////////////////////////////////////////////////////////////////////////	 
@@ -86,10 +88,9 @@ erase "${weight_checks}/weight_comparison_F1a.txt"
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 */
 
-probit dchpd dag /*dhe dhe_mcs dhe_pcs*/ ib1.dcpst stm /*y2020 y2021*/ i.dot if ///
+probit dchpd Dag /*dhe dhe_mcs dhe_pcs li.Dcpst_Single*/ Year_transformed /*y2020 y2021*/ Ethn_Asian Ethn_Black Ethn_Other if ///
     sprfm == 1 & ded == 1 [pweight=dimxwt], vce(robust)
 
-
 * raw results 
 matrix results = r(table)
 matrix results = results[1..6,1...]'
@@ -112,95 +113,77 @@ scalar N = e(N)
 scalar chi2 = e(chi2)
 scalar ll = e(ll)	
 
+* Store results in Excel 
 
-* Results 	
-* Note: Zeros eliminated 
-	
+* Store estimates
 matrix b = e(b)	
 matrix V = e(V)
 
+mata:
+	// Call matrices into mata 
+    V = st_matrix("V")
+    b = st_matrix("b")
 
-*  Store variance-covariance matrix 
-
-preserve
-
-putexcel set "$dir_raw_results/fertility/var_cov", sheet("var_cov") replace
-putexcel A1 = matrix(V)
-
-import excel "$dir_raw_results/fertility/var_cov", sheet("var_cov") clear
-
-describe
-local no_vars = `r(k)'	
+    // Find which coefficients are nonzero
+    keep = (b :!= 0)
 	
-forvalues i = 1/2 {
-	egen row_sum = rowtotal(*)
-	drop if row_sum == 0 
-	drop row_sum
-	xpose, clear	
-}	
+	// Eliminate zeros
+	b_trimmed = select(b, keep)
+    V_trimmed = select(V, keep)
+    V_trimmed = select(V_trimmed', keep)'
+
+	// Inspection
+	b_trimmed 
+	V_trimmed 
 	
-mkmat v*, matrix(var)	
-putexcel set "$dir_results/reg_fertility", sheet("UK_F1a") modify
-putexcel C2 = matrix(var)
-		
-restore	
+    // Return to Stata
+    st_matrix("b_trimmed", b_trimmed')
+    st_matrix("V_trimmed", V_trimmed)
+	st_matrix("nonzero_b_flag", keep)
+end	
 
+* Export into Excel 
+putexcel set "$dir_results/reg_fertility", sheet("F1a") modify
+putexcel B2 = matrix(b_trimmed)
+putexcel C2 = matrix(V_trimmed)
 
-* Store estimated coefficients 
 
-// Initialize a counter for non-zero coefficients
-local non_zero_count = 0
-//local names : colnames b
-
-// Loop through each element in `b` to count non-zero coefficients
-forvalues i = 1/`no_vars' {
-    if (b[1, `i'] != 0) {
-        local non_zero_count = `non_zero_count' + 1
-    }
-}
-
-// Create a new row vector to hold only non-zero coefficients
-matrix nonzero_b = J(1, `non_zero_count', .)
-
-// Populate nonzero_b with non-zero coefficients from b
-local index = 1
-forvalues i = 1/`no_vars' {
-    if (b[1, `i'] != 0) {
-        matrix nonzero_b[1, `index'] = b[1, `i']
-        local index = `index' + 1
-    }
-}
-
-putexcel set "$dir_results/reg_fertility", sheet("UK_F1a") modify
-putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) 
+* Labelling 
+// Need to variable label when add new variable to model. Order matters. 
+local var_list Dag Year_transformed Ethn_Asian Ethn_Black Ethn_Other Constant
+	   
 	
+putexcel A1 = "REGRESSOR"
+putexcel B1 = "COEFFICIENT"
 	
+local i = 1 	
+foreach var in `var_list' {
+	local ++i
 	
+	putexcel A`i' = "`var'"
 	
-* Labelling	
+} 	
 
-putexcel A1 = "REGRESSOR"
-putexcel A2 = "Dag"
-putexcel A3 = "Dcpst_Single"
-putexcel A4 = "Year_transformed"
-putexcel A5 = "Ethn_Black"
-putexcel A6 = "Ethn_Other"
-putexcel A7 = "Constant"
+local i = 2 	
+foreach var in `var_list' {
+    local ++i
 
-putexcel B1 = "COEFFICIENT"
-putexcel C1 = "Dag"
-putexcel D1 = "Dcpst_Single"
-putexcel E1 = "Year_transformed"
-putexcel F1 = "Ethn_Black"	
-putexcel G1 = "Ethn_Other"
-putexcel H1 = "Constant"	
+    if `i' <= 26 {
+        local letter = char(64 + `i')  // Convert 1=A, 2=B, ..., 26=Z
+        putexcel `letter'1 = "`var'"
+    }
+    else {
+        local first = char(64 + int((`i' - 1) / 26))  // First letter: A-Z
+        local second = char(65 + mod((`i' - 1), 26)) // Second letter: A-Z
+        putexcel `first'`second'1 = "`var'"  // Correctly places AA-ZZ
+    }
+}
 
-	
-* Goodness of fit
 
+* Export model fit statistics
 putexcel set "$dir_results/reg_fertility", sheet("Gof") modify
 
-putexcel A3 = "F1a - Fertility in initial education spell", bold		
+putexcel A9 = "F1a - Fertility, in initial education spell", bold		
 
 putexcel A5 = "Pseudo R-squared" 
 putexcel B5 = r2_p 
@@ -212,7 +195,10 @@ putexcel E6 = "Log likelihood"
 putexcel F6 = ll		
 
 drop in_sample p
-scalar drop r2_p N chi2 ll	
+scalar drop r2_p N chi2 ll		
+
+	
+
 
 ************************************************
 * F1b - Having a child, left initial edu spell *
@@ -245,9 +231,17 @@ erase "${weight_checks}/weight_comparison_F1b.txt"
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 */
 
-probit dchpd dag dagsq li.ydses_c5 l.dnc l.dnc02 /*ib1.dhe*/ dhe_pcs dhe_mcs /*ib1.dcpst*/ ///
-    lib1.dcpst ib1.deh_c3 dukfr li.les_c3 ib8.drgn1 stm y2020 y2021 i.dot if ///
-    (sprfm == 1 & ded == 0) [pweight=dimxwt], vce(robust)
+probit dchpd Dag Dag_sq Ydses_c5_Q2_L1 Ydses_c5_Q3_L1 Ydses_c5_Q4_L1 Ydses_c5_Q5_L1 ///
+    Dnc_L1 Dnc02_L1 ///
+	Dhe_pcs Dhe_mcs ///
+	Dcpst_Single_L1  ///
+	Deh_c3_Medium Deh_c3_Low ///
+	FertilityRate ///
+	Les_c3_Student_L1 Les_c3_NotEmployed_L1 ///
+	UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN ///
+	Year_transformed Y2020 Y2021 Ethn_Asian Ethn_Black Ethn_Other ///
+if (sprfm == 1 & ded == 0) [pweight = dimxwt], vce(robust)	
+	
 
 	* raw results 
 matrix results = r(table)
@@ -271,151 +265,84 @@ scalar N = e(N)
 scalar chi2 = e(chi2)
 scalar ll = e(ll)
 
-	
-* Results 
-* Note: Zeros eliminated 
-	
-matrix b = e(b)	
-matrix V = e(V)
-
 
-* Store variance-covariance matrix 
+* Store results in Excel 
 
-preserve
-
-putexcel set "$dir_raw_results/fertility/var_cov", sheet("var_cov") replace
-putexcel A1 = matrix(V)
+* Store estimates
+matrix b = e(b)	
+matrix V = e(V)
 
-import excel "$dir_raw_results/fertility/var_cov", sheet("var_cov") clear
+mata:
+	// Call matrices into mata 
+    V = st_matrix("V")
+    b = st_matrix("b")
 
-describe
-local no_vars = `r(k)'	
+    // Find which coefficients are nonzero
+    keep = (b :!= 0)
 	
-forvalues i = 1/2 {
-	egen row_sum = rowtotal(*)
-	drop if row_sum == 0 
-	drop row_sum
-	xpose, clear	
-}	
+	// Eliminate zeros
+	b_trimmed = select(b, keep)
+    V_trimmed = select(V, keep)
+    V_trimmed = select(V_trimmed', keep)'
+
+	// Inspection
+	b_trimmed 
+	V_trimmed 
 	
-mkmat v*, matrix(var)	
-putexcel set "$dir_results/reg_fertility", sheet("UK_F1b") modify
-putexcel C2 = matrix(var)
-		
-restore	
+    // Return to Stata
+    st_matrix("b_trimmed", b_trimmed')
+    st_matrix("V_trimmed", V_trimmed)
+	st_matrix("nonzero_b_flag", keep)
+end	
 
+* Export into Excel 
+putexcel set "$dir_results/reg_fertility", sheet("F1b") modify
+putexcel B2 = matrix(b_trimmed)
+putexcel C2 = matrix(V_trimmed)
 
-* Store estimated coefficients 
+* Labelling 
+// Need to variable label when add new variable to model. Order matters. 
+local var_list Dag Dag_sq Ydses_c5_Q2_L1 Ydses_c5_Q3_L1 Ydses_c5_Q4_L1 Ydses_c5_Q5_L1 ///
+    Dnc_L1 Dnc02_L1 ///
+	Dhe_pcs Dhe_mcs ///
+	Dcpst_Single_L1  ///
+	Deh_c3_Medium Deh_c3_Low ///
+	FertilityRate ///
+	Les_c3_Student_L1 Les_c3_NotEmployed_L1 ///
+	UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN ///
+	Year_transformed Y2020 Y2021 Ethn_Asian Ethn_Black Ethn_Other Constant
+	   
+	
+putexcel A1 = "REGRESSOR"
+putexcel B1 = "COEFFICIENT"
+	
+local i = 1 	
+foreach var in `var_list' {
+	local ++i
+	
+	putexcel A`i' = "`var'"
+	
+} 	
 
-// Initialize a counter for non-zero coefficients
-local non_zero_count = 0
-//local names : colnames b
+local i = 2 	
+foreach var in `var_list' {
+    local ++i
 
-// Loop through each element in `b` to count non-zero coefficients
-forvalues i = 1/`no_vars' {
-    if (b[1, `i'] != 0) {
-        local non_zero_count = `non_zero_count' + 1
+    if `i' <= 26 {
+        local letter = char(64 + `i')  // Convert 1=A, 2=B, ..., 26=Z
+        putexcel `letter'1 = "`var'"
     }
-}
-
-// Create a new row vector to hold only non-zero coefficients
-matrix nonzero_b = J(1, `non_zero_count', .)
-
-// Populate nonzero_b with non-zero coefficients from b
-local index = 1
-forvalues i = 1/`no_vars' {
-    if (b[1, `i'] != 0) {
-        matrix nonzero_b[1, `index'] = b[1, `i']
-        local index = `index' + 1
+    else {
+        local first = char(64 + int((`i' - 1) / 26))  // First letter: A-Z
+        local second = char(65 + mod((`i' - 1), 26)) // Second letter: A-Z
+        putexcel `first'`second'1 = "`var'"  // Correctly places AA-ZZ
     }
 }
 
-putexcel set "$dir_results/reg_fertility", sheet("UK_F1b") modify
-putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) 	
- 
- 
-* Labelling 
- 
-putexcel A1 = "REGRESSOR"
-putexcel A2 = "Dag"
-putexcel A3 = "Dag_sq"
-putexcel A4 = "Ydses_c5_Q2_L1"
-putexcel A5 = "Ydses_c5_Q3_L1"
-putexcel A6 = "Ydses_c5_Q4_L1"
-putexcel A7 = "Ydses_c5_Q5_L1"
-putexcel A8 = "Dnc_L1"
-putexcel A9 = "Dnc02_L1"
-putexcel A10 = "Dhe_pcs"
-putexcel A11 = "Dhe_mcs"
-putexcel A12 = "Dcpst_Single_L1"
-putexcel A13 = "Dcpst_PreviouslyPartnered_L1"
-putexcel A14 = "Deh_c3_Medium"
-putexcel A15 = "Deh_c3_Low"
-putexcel A16 = "FertilityRate"
-putexcel A17 = "Les_c3_Student_L1"
-putexcel A18 = "Les_c3_NotEmployed_L1"
-putexcel A19 = "UKC"
-putexcel A20 = "UKD"
-putexcel A21 = "UKE"
-putexcel A22 = "UKF"
-putexcel A23 = "UKG"
-putexcel A24 = "UKH"
-putexcel A25 = "UKJ"
-putexcel A26 = "UKK"
-putexcel A27 = "UKL"
-putexcel A28 = "UKM"
-putexcel A29 = "UKN"
-putexcel A30 = "Year_transformed"
-putexcel A31 = "Y2020"
-putexcel A32 = "Y2021"
-putexcel A33 = "Ethn_Asian"
-putexcel A34 = "Ethn_Black"
-putexcel A35 = "Ethn_Other"
-putexcel A36 = "Constant"
-
-putexcel B1 = "COEFFICIENT"
-putexcel C1 = "Dag"
-putexcel D1 = "Dag_sq"
-putexcel E1 = "Ydses_c5_Q2_L1"
-putexcel F1 = "Ydses_c5_Q3_L1"
-putexcel G1 = "Ydses_c5_Q4_L1"
-putexcel H1 = "Ydses_c5_Q5_L1"
-putexcel I1 = "Dnc_L1"
-putexcel J1 = "Dnc02_L1"
-putexcel K1 = "Dhe_pcs"
-putexcel L1 = "Dhe_mcs"
-putexcel M1 = "Dcpst_Single_L1"
-putexcel N1 = "Dcpst_PreviouslyPartnered_L1"
-putexcel O1 = "Deh_c3_Medium"
-putexcel P1 = "Deh_c3_Low"
-putexcel Q1 = "FertilityRate"
-putexcel R1 = "Les_c3_Student_L1"
-putexcel S1 = "Les_c3_NotEmployed_L1"
-putexcel T1 = "UKC"
-putexcel U1 = "UKD"
-putexcel V1 = "UKE"
-putexcel W1 = "UKF"
-putexcel X1 = "UKG"
-putexcel Y1 = "UKH"
-putexcel Z1 = "UKJ"
-putexcel AA1 = "UKK"
-putexcel AB1 = "UKL"
-putexcel AC1 = "UKM"
-putexcel AD1 = "UKN"
-putexcel AE1 = "Year_transformed"
-putexcel AF1 = "Y2020"
-putexcel AG1 = "Y2021"
-putexcel AH1 = "Ethn_Asian"
-putexcel AI1 = "Ethn_Black"
-putexcel AJ1 = "Ethn_Other"
-putexcel AK1 = "Constant"
-
- 
-* Goodness of fit
-
+* Export model fit statistics
 putexcel set "$dir_results/reg_fertility", sheet("Gof") modify
 
-putexcel A9 = "F1b - Fertility left initial education spell", bold		
+putexcel A9 = "F1b - Fertility, left initial education spell", bold		
 
 putexcel A11 = "Pseudo R-squared" 
 putexcel B11 = r2_p 
@@ -428,7 +355,7 @@ putexcel F12 = ll
 
 drop in_sample p
 scalar drop r2_p N chi2 ll	
- 
- 
+		
+
 capture log close 
 
diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do b/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do
index 427c70485..d75449cd7 100644
--- a/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do
+++ b/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do
@@ -26,12 +26,11 @@ use "$dir_ukhls_data/ukhls_pooled_all_obs_09.dta", clear
 do "$dir_do/variable_update"
 
 
-*sample selection 
+/*sample selection 
 drop if dag < 16
 
-
 xtset idperson swv
-
+*/
 
 * Set Excel file 
 
@@ -41,7 +40,7 @@ putexcel set "$dir_results/reg_home_ownership", sheet("Info") replace
 putexcel A1 = "Description:"
 putexcel B1 = "Model parameters governing projection of home ownership"
 putexcel A2 = "Authors:	Patryk Bronka, Justin van de Ven, Daria Popova" 
-putexcel A3 = "Last edit: 1 July 2025 DP"
+putexcel A3 = "Last edit: 4 Nov 2025 DP"
 
 putexcel A4 = "Process:", bold
 putexcel B4 = "Description:", bold
@@ -51,6 +50,7 @@ putexcel B5 = "Probit regression estimates of the probability of being a home ow
 putexcel A10 = "Notes:", bold
 putexcel B10 = "Have combined dhhtp_c4 and lessp_c3 into a single variable with 8 categories, dhhtp_c8"
 putexcel B11 = "Added lagged home ownership, replaced dhe with dhe_pcs and dhe_mcs, added ethnicity (dot) and covid dummies (y2020 2021)"
+putexcel B12 = "Re-estimated process at benefit unit level to be consistent with SimPaths"
 
 putexcel set "$dir_results/reg_home_ownership", sheet("Gof") modify
 putexcel A1 = "Goodness of fit", bold		
@@ -61,12 +61,13 @@ putexcel A1 = "Goodness of fit", bold
 ************************
 
 * Process HO1a: Probability of being a home owner 
-* Sample: Individuals aged 18+
+* Sample: Individuals aged 18+ who are benefit unit heads 
 * DV: Home ownerhip dummy
 
+/*
 fre dhh_owned if dag >= 18
 
-/*/////////////////////////////////////////////////////////////////////////////////////////////////	 
+/////////////////////////////////////////////////////////////////////////////////////////////////	 
 //check weights //////////////////////////////////////////////////////////////////////////////////	 
 probit dhh_owned dgn dag dagsq il.dhhtp_c8 il.les_c3 ///
 i.deh_c3 /*il.dhe*/ l.dhe_mcs l.dhe_pcs il.ydses_c5 l.yptciihs_dv l.dhh_owned ib8.drgn1 stm y2020 y2021 i.dot if ///
@@ -85,12 +86,97 @@ outreg2 using "${weight_checks}/weight_comparison_HO1a.xls", alpha(0.001, 0.01,
 erase "${weight_checks}/weight_comparison_HO1a.txt"
 //////////////////////////////////////////////////////////////////////////////////////////////////// 
 ////////////////////////////////////////////////////////////////////////////////////////////////////	
-*/	
+
 	
 probit dhh_owned dgn dag dagsq il.dhhtp_c8 il.les_c3 ///
 i.deh_c3 /*il.dhe*/ l.dhe_mcs l.dhe_pcs il.ydses_c5 l.yptciihs_dv l.dhh_owned ib8.drgn1 stm y2020 y2021 i.dot if ///
 dag>=18 [pweight=dimxwt], vce(cluster idperson)
+*/	
+
+* DEFINE BENEFIT UNIT HEAD (AGED 18+)
+
+* Keep adults (18+)
+keep if dag >= 18
+
+
+* Count unique benefit-unit–wave combinations BEFORE head selection
+egen tag_bu_wave = tag(idbenefitunit swv)
+count if tag_bu_wave
+local n_bu_before = r(N)
+display "Number of benefit unit–wave combinations BEFORE selecting head: `n_bu_before'"
+
+
+* Sort benefit unit members within each wave:
+* 1. Highest non-benefit income (ypnbihs_dv)
+* 2. Highest age (dag)
+* 3. Lowest idperson (idperson)
+gsort idbenefitunit swv -ypnbihs_dv -dag idperson 
+
+* Tag the first person (the "head") per benefit unit and wave
+bysort idbenefitunit swv: gen benunit_head = (_n == 1)
+
+* Keep only benefit unit heads
+keep if benunit_head == 1
+
+* Count unique benefit-unit–wave combinations AFTER head selection
+drop tag_bu_wave
+egen tag_bu_wave = tag(idbenefitunit swv)
+count if tag_bu_wave
+local n_bu_after = r(N)
+display "Number of benefit unit–wave combinations AFTER selecting head: `n_bu_after'"
+
+* Ensure benefit unit–wave counts match before and after head selection
+assert `n_bu_before' == `n_bu_after'
+
+* Verify only one head per benefit unit per wave
+by idbenefitunit swv, sort: gen n=_N
+assert n==1
+
+* Declare panel 
+xtset idperson swv 
+
+
+********************************************************************************
+* SET EXCEL OUTPUT FILES
+********************************************************************************
+
+* Info sheet
+putexcel set "$dir_results/reg_home_ownership", sheet("Info") replace
+putexcel A1 = "Description:"
+putexcel B1 = "Model parameters governing projection of home ownership"
+putexcel A2 = "Authors:	Patryk Bronka, Justin van de Ven, Daria Popova" 
+putexcel A3 = "Last edit: 4 Nov 2025 DP"
 
+putexcel A4 = "Process:", bold
+putexcel B4 = "Description:", bold
+putexcel A5 = "HO1a"
+putexcel B5 = "Probit regression estimates of the probability of being a home owner, benefit unit heads aged 18+"
+
+putexcel A10 = "Notes:", bold
+putexcel B10 = "Have combined dhhtp_c4 and lessp_c3 into a single variable with 8 categories, dhhtp_c8"
+putexcel B11 = "Added lagged home ownership, replaced dhe with dhe_pcs and dhe_mcs, added ethnicity (dot) and covid dummies (y2020, y2021)"
+putexcel B12 = "Re-estimated process at benefit unit level using heads defined by highest personal non-benefit income, or age, or lowest idperson"
+
+putexcel set "$dir_results/reg_home_ownership", sheet("Gof") modify
+putexcel A1 = "Goodness of fit", bold		
+
+
+********************************************************************************
+* HO1a: Home ownership 
+********************************************************************************
+	
+probit dhh_owned Dgn Dag Dag_sq ///
+       Dhhtp_c8_2_L1 Dhhtp_c8_3_L1 Dhhtp_c8_4_L1 Dhhtp_c8_5_L1 Dhhtp_c8_6_L1 Dhhtp_c8_7_L1 Dhhtp_c8_8_L1 ///
+       Les_c3_Student_L1 Les_c3_NotEmployed_L1 ///
+	   Deh_c3_Medium Deh_c3_Low ///
+	   Dhe_mcs_L1 Dhe_pcs_L1 ///
+	   Ydses_c5_Q2_L1 Ydses_c5_Q3_L1 Ydses_c5_Q4_L1 Ydses_c5_Q5_L1 ///
+	   Yptciihs_dv_L1 ///
+	   Dhh_owned_L1 ///
+	   UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN /// 
+	   Year_transformed Y2020 Y2021 Ethn_Asian Ethn_Black Ethn_Other ///
+	   [pweight = dimxwt], vce(cluster idperson)  
+	   
 
 * raw results 
 matrix results = r(table)
@@ -177,90 +263,45 @@ putexcel A1 = matrix(nonzero_b'), names nformat(number_d2)
 	
 
 * Labelling 
- 
+// Need to variable label when add new variable to model. Order matters. 
+local var_list Dgn Dag Dag_sq ///
+       Dhhtp_c8_2_L1 Dhhtp_c8_3_L1 Dhhtp_c8_4_L1 Dhhtp_c8_5_L1 Dhhtp_c8_6_L1 Dhhtp_c8_7_L1 Dhhtp_c8_8_L1 ///
+       Les_c3_Student_L1 Les_c3_NotEmployed_L1 ///
+	   Deh_c3_Medium Deh_c3_Low ///
+	   Dhe_mcs_L1 Dhe_pcs_L1 ///
+	   Ydses_c5_Q2_L1 Ydses_c5_Q3_L1 Ydses_c5_Q4_L1 Ydses_c5_Q5_L1 ///
+	   Yptciihs_dv_L1 ///
+	   Dhh_owned_L1 ///
+	   UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN /// 
+	   Year_transformed Y2020 Y2021 Ethn_Asian Ethn_Black Ethn_Other ///
+	   Constant
+	
+	
 putexcel A1 = "REGRESSOR"
-putexcel A2 = "Dgn"
-putexcel A3 = "Dag"
-putexcel A4 = "Dag_sq"
-putexcel A5 = "Dhhtp_c8_2_L1"
-putexcel A6 = "Dhhtp_c8_3_L1"
-putexcel A7 = "Dhhtp_c8_4_L1"
-putexcel A8 = "Dhhtp_c8_5_L1"
-putexcel A9 = "Dhhtp_c8_6_L1"
-putexcel A10 = "Dhhtp_c8_7_L1"
-putexcel A11 = "Dhhtp_c8_8_L1"
-putexcel A12 = "Les_c3_Student_L1"
-putexcel A13 = "Les_c3_NotEmployed_L1"
-putexcel A14 = "Deh_c3_Medium"
-putexcel A15 = "Deh_c3_Low"
-putexcel A16 = "Dhe_mcs"
-putexcel A17 = "Dhe_pcs"
-putexcel A18 = "Ydses_c5_Q2_L1"
-putexcel A19 = "Ydses_c5_Q3_L1"
-putexcel A20 = "Ydses_c5_Q4_L1"
-putexcel A21 = "Ydses_c5_Q5_L1"
-putexcel A22 = "Yptciihs_dv_L1"
-putexcel A23 = "Dhh_owned_L1"
-putexcel A24 = "UKC"
-putexcel A25 = "UKD"
-putexcel A26 = "UKE"
-putexcel A27 = "UKF"
-putexcel A28 = "UKG"
-putexcel A29 = "UKH"
-putexcel A30 = "UKJ"
-putexcel A31 = "UKK"
-putexcel A32 = "UKL"
-putexcel A33 = "UKM"
-putexcel A34 = "UKN"
-putexcel A35 = "Year_transformed"
-putexcel A36 = "Y2020"
-putexcel A37 = "Y2021"
-putexcel A38 = "Ethn_Asian"
-putexcel A39 = "Ethn_Black"
-putexcel A40 = "Ethn_Other"
-putexcel A41 = "Constant"
-
 putexcel B1 = "COEFFICIENT"
-putexcel C1 = "Dgn"
-putexcel D1 = "Dag"
-putexcel E1 = "Dag_sq"
-putexcel F1 = "Dhhtp_c8_2_L1"
-putexcel G1 = "Dhhtp_c8_3_L1"
-putexcel H1 = "Dhhtp_c8_4_L1"
-putexcel I1 = "Dhhtp_c8_5_L1"
-putexcel J1 = "Dhhtp_c8_6_L1"
-putexcel K1 = "Dhhtp_c8_7_L1"
-putexcel L1 = "Dhhtp_c8_8_L1"
-putexcel M1 = "Les_c3_Student_L1"
-putexcel N1 = "Les_c3_NotEmployed_L1"
-putexcel O1 = "Deh_c3_Medium"
-putexcel P1 = "Deh_c3_Low"
-putexcel Q1 = "Dhe_mcs"
-putexcel R1 = "Dhe_pcs"
-putexcel S1 = "Ydses_c5_Q2_L1"
-putexcel T1 = "Ydses_c5_Q3_L1"
-putexcel U1 = "Ydses_c5_Q4_L1"
-putexcel V1 = "Ydses_c5_Q5_L1"
-putexcel W1 = "Yptciihs_dv_L1"
-putexcel X1 = "Dhh_owned_L1"
-putexcel Y1 = "UKC"
-putexcel Z1 = "UKD"
-putexcel AA1 = "UKE"
-putexcel AB1 = "UKF"
-putexcel AC1 = "UKG"
-putexcel AD1 = "UKH"
-putexcel AE1 = "UKJ"
-putexcel AF1 = "UKK"
-putexcel AG1 = "UKL"
-putexcel AH1 = "UKM"
-putexcel AI1 = "UKN"
-putexcel AJ1 = "Year_transformed"
-putexcel AK1 = "Y2020"
-putexcel AL1 = "Y2021"
-putexcel AM1 = "Ethn_Asian"
-putexcel AN1 = "Ethn_Black"
-putexcel AO1 = "Ethn_Other"
-putexcel AP1 = "Constant"
+	
+local i = 1 	
+foreach var in `var_list' {
+	local ++i
+	
+	putexcel A`i' = "`var'"
+	
+} 	
+
+local i = 2 	
+foreach var in `var_list' {
+    local ++i
+
+    if `i' <= 26 {
+        local letter = char(64 + `i')  // Convert 1=A, 2=B, ..., 26=Z
+        putexcel `letter'1 = "`var'"
+    }
+    else {
+        local first = char(64 + int((`i' - 1) / 26))  // First letter: A-Z
+        local second = char(65 + mod((`i' - 1), 26)) // Second letter: A-Z
+        putexcel `first'`second'1 = "`var'"  // Correctly places AA-ZZ
+    }
+}
 
 
 * Goodness of fit
diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_leaveParentalHome.do b/input/InitialPopulations/compile/RegressionEstimates/reg_leaveParentalHome.do
index 9a852ab65..9c37f0f22 100644
--- a/input/InitialPopulations/compile/RegressionEstimates/reg_leaveParentalHome.do
+++ b/input/InitialPopulations/compile/RegressionEstimates/reg_leaveParentalHome.do
@@ -41,7 +41,7 @@ putexcel set "$dir_work/reg_leaveParentalHome", sheet("Info") replace
 putexcel A1 = "Description:"
 putexcel B1 = "Model parameters governing leaving parental home"
 putexcel A2 = "Authors:	Patryk Bronka, Justin van de Ven, Daria Popova" 
-putexcel A3 = "Last edit: 1 July 2025 DP"
+putexcel A3 = "Last edit: 4 Nov 2025 DP"
 
 putexcel A4 = "Process:", bold
 putexcel B4 = "Description:", bold
@@ -49,23 +49,26 @@ putexcel A5 = "P1a"
 putexcel B5 = "Probit regression estimates for leaving the parental home - 18+, not in intitial education spell, living with parents in t-1"
 
 putexcel A10 = "Notes:", bold
-putexcel B10 = "Added: ethnicity-4 cat (dot); covid dummies (y2020 y2021); not partnered condition (dcpst != 1) to be consistent with the simulation"
+putexcel B10 = "Added: ethnicity-4 cat (dot); covid dummies (y2020 y2021)"
+putexcel B11 = "DV is synchronised with the adult child definition"
 
 putexcel set "$dir_work/reg_leaveParentalHome", sheet("Gof") modify
 putexcel A1 = "Goodness of fit", bold		
 
 
-************************************
-* Process P1a: Leave Parental Home *
-************************************
-
+********************************************************************************
+* Process P1a: Leave Parental Home 
+********************************************************************************
 * Process P1a: Probability of leaving the parental home. 
-* Sample: All respondents living with a parent in t-1, aged 18+, not in initial 
+* Sample: All respondents adult child in t-1 and not currently in initial 
 * 			education spell 
-* DV: Left parental home dummy of those who lived with parents in t-1
-* Note: Added not partnered condition as well to be consistent with the simulation	
-fre dlftphm if (ded == 0 & dag >= 18 & dcpst != 1) //3.65%
- 
+* DV: Observed transitioning from adult child to non-adult child
+
+xtset idperson swv		
+//fre dlftphm if (ded == 0 & dag >= 18 & dcpst != 1) //3.65%
+fre dlftphm if (ded == 0 & dag >= 18 ) 
+tab2 stm dlftphm if (ded == 0 & dag >= 18), r 
+
 /*/////////////////////////////////////////////////////////////////////////////////////////////////	 
 //check weights //////////////////////////////////////////////////////////////////////////////////	 
 probit dlftphm i.dgn dag dagsq ib1.deh_c3 li.les_c3 li.ydses_c5 ib8.drgn1 stm y2020 y2021 i.dot ///
@@ -84,9 +87,13 @@ erase "${weight_checks}/weight_comparison_P1a.txt"
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 */
 
-probit dlftphm i.dgn dag dagsq ib1.deh_c3 li.les_c3 li.ydses_c5 ib8.drgn1 stm y2020 y2021 i.dot ///
-    if (ded==0 & dag>=18 & l.dlftphm==0 & dcpst != 1) [pweight=dimxwt], vce(robust)	
-	
+probit dlftphm Dgn Dag Dag_sq Deh_c3_Medium Deh_c3_Low ///
+	Les_c3_Student_L1 Les_c3_NotEmployed_L1 ///
+	Ydses_c5_Q2_L1 Ydses_c5_Q3_L1 Ydses_c5_Q4_L1 Ydses_c5_Q5_L1 ///
+	UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN /// 
+	Year_transformed Y2020 Y2021 Ethn_Asian Ethn_Black Ethn_Other ///
+	if (ded == 0 & dag >= 18 /*& dagpns!=1 & les_c4!=4*/ ) [pw = dimxwt], vce(robust)
+
 	
 	* save raw results 	
 matrix results = r(table)
@@ -145,98 +152,76 @@ putexcel C2 = matrix(var)
 restore	
 
 
-* Store estimated coefficients 
+* Store results in Excel 
 
-// Initialize a counter for non-zero coefficients
-local non_zero_count = 0
-//local names : colnames b
-
-// Loop through each element in `b` to count non-zero coefficients
-forvalues i = 1/`no_vars' {
-    if (b[1, `i'] != 0) {
-        local non_zero_count = `non_zero_count' + 1
-    }
-}
+* Store estimates
+matrix b = e(b)	
+matrix V = e(V)
 
-// Create a new row vector to hold only non-zero coefficients
-matrix nonzero_b = J(1, `non_zero_count', .)
+mata:
+	// Call matrices into mata 
+    V = st_matrix("V")
+    b = st_matrix("b")
 
-// Populate nonzero_b with non-zero coefficients from b
-local index = 1
-forvalues i = 1/`no_vars' {
-    if (b[1, `i'] != 0) {
-        matrix nonzero_b[1, `index'] = b[1, `i']
-        local index = `index' + 1
-    }
-}
+    // Find which coefficients are nonzero
+    keep = (b :!= 0)
+	
+	// Eliminate zeros
+	b_trimmed = select(b, keep)
+    V_trimmed = select(V, keep)
+    V_trimmed = select(V_trimmed', keep)'
+
+	// Inspection
+	b_trimmed 
+	V_trimmed 
+	
+    // Return to Stata
+    st_matrix("b_trimmed", b_trimmed')
+    st_matrix("V_trimmed", V_trimmed)
+	st_matrix("nonzero_b_flag", keep)
+end	
 
+* Export into Excel 
 putexcel set "$dir_results/reg_leaveParentalHome", sheet("UK_P1a") modify
-putexcel A1 = matrix(nonzero_b'), names //nformat(number_d2) 
+putexcel B2 = matrix(b_trimmed)
+putexcel C2 = matrix(V_trimmed)
+
+* Labelling 
+// Need to variable label when add new variable to model. Order matters. 
+local var_list Dgn Dag Dag_sq ///
+    Deh_c3_Medium Deh_c3_Low ///
+	Les_c3_Student_L1 Les_c3_NotEmployed_L1 ///
+	Ydses_c5_Q2_L1 Ydses_c5_Q3_L1 Ydses_c5_Q4_L1 Ydses_c5_Q5_L1 ///
+	UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN /// 
+	Year_transformed Y2020 Y2021 Ethn_Asian Ethn_Black Ethn_Other ///
+	Constant
 	
 	
-* Labeling 
-
 putexcel A1 = "REGRESSOR"
-putexcel A2 = "Dgn"
-putexcel A3 = "Dag"
-putexcel A4 = "Dag_sq"
-putexcel A5 = "Deh_c3_Medium"
-putexcel A6 = "Deh_c3_Low"
-putexcel A7 = "Les_c3_Student_L1"
-putexcel A8 = "Les_c3_NotEmployed_L1"
-putexcel A9 = "Ydses_c5_Q2_L1"
-putexcel A10 = "Ydses_c5_Q3_L1"
-putexcel A11 = "Ydses_c5_Q4_L1"
-putexcel A12 = "Ydses_c5_Q5_L1"
-putexcel A13 = "UKC"
-putexcel A14 = "UKD"
-putexcel A15 = "UKE"
-putexcel A16 = "UKF"
-putexcel A17 = "UKG"
-putexcel A18 = "UKH"
-putexcel A19 = "UKJ"
-putexcel A20 = "UKK"
-putexcel A21 = "UKL"
-putexcel A22 = "UKM"
-putexcel A23 = "UKN"
-putexcel A24 = "Year_transformed"
-putexcel A25 = "Y2020"
-putexcel A26 = "Y2021"
-putexcel A27 = "Ethn_Asian"
-putexcel A28 = "Ethn_Black"
-putexcel A29 = "Ethn_Other"
-putexcel A30 = "Constant"
-
 putexcel B1 = "COEFFICIENT"
-putexcel C1 = "Dgn"
-putexcel D1 = "Dag"
-putexcel E1 = "Dag_sq"
-putexcel F1 = "Deh_c3_Medium"
-putexcel G1 = "Deh_c3_Low"
-putexcel H1 = "Les_c3_Student_L1"
-putexcel I1 = "Les_c3_NotEmployed_L1"
-putexcel J1 = "Ydses_c5_Q2_L1"
-putexcel K1 = "Ydses_c5_Q3_L1"
-putexcel L1 = "Ydses_c5_Q4_L1"
-putexcel M1 = "Ydses_c5_Q5_L1"
-putexcel N1 = "UKC"
-putexcel O1 = "UKD"
-putexcel P1 = "UKE"
-putexcel Q1 = "UKF"
-putexcel R1 = "UKG"
-putexcel S1 = "UKH"
-putexcel T1 = "UKJ"
-putexcel U1 = "UKK"
-putexcel V1 = "UKL"
-putexcel W1 = "UKM"
-putexcel X1 = "UKN"
-putexcel Y1 = "Year_transformed"
-putexcel Z1 = "Y2020"
-putexcel AA1 = "Y2021"
-putexcel AB1 = "Ethn_Asian"
-putexcel AC1 = "Ethn_Black"
-putexcel AD1 = "Ethn_Other"
-putexcel AE1 = "Constant"
+	
+local i = 1 	
+foreach var in `var_list' {
+	local ++i
+	
+	putexcel A`i' = "`var'"
+	
+} 	
+
+local i = 2 	
+foreach var in `var_list' {
+    local ++i
+
+    if `i' <= 26 {
+        local letter = char(64 + `i')  // Convert 1=A, 2=B, ..., 26=Z
+        putexcel `letter'1 = "`var'"
+    }
+    else {
+        local first = char(64 + int((`i' - 1) / 26))  // First letter: A-Z
+        local second = char(65 + mod((`i' - 1), 26)) // Second letter: A-Z
+        putexcel `first'`second'1 = "`var'"  // Correctly places AA-ZZ
+    }
+}
 
 	
 * Goodness of fit 
diff --git a/input/InitialPopulations/compile/RegressionEstimates/variable_update.do b/input/InitialPopulations/compile/RegressionEstimates/variable_update.do
index 84ceb66f4..e10372b10 100644
--- a/input/InitialPopulations/compile/RegressionEstimates/variable_update.do
+++ b/input/InitialPopulations/compile/RegressionEstimates/variable_update.do
@@ -49,9 +49,18 @@ replace dhhtp_c8 = 5 if dhhtp_c4 == 2 & lessp_c3 == 2
 replace dhhtp_c8 = 6 if dhhtp_c4 == 2 & lessp_c3 == 3	
 replace dhhtp_c8 = 7 if dhhtp_c4 == 3
 replace dhhtp_c8 = 8 if dhhtp_c4 == 4
-cap label define dhhtp_c8 1 "Couple with no children, spouse employed" 2 "Couple with no children, spouse student" 3 "Couple with no children, spouse not employed" 4 "Couple with children, spouse employed" 5 "Couple with children, spouse student" 6 "Couple with children, spouse not employed" 7 "Single with no children" 8 "Single with children"
+cap label define dhhtp_c8 1 "Couple with no children, spouse employed" ///
+2 "Couple with no children, spouse student" ///
+3 "Couple with no children, spouse not employed" ///
+4 "Couple with children, spouse employed" ///
+5 "Couple with children, spouse student" ///
+6 "Couple with children, spouse not employed" ///
+7 "Single with no children" ///
+8 "Single with children"
 label values dhhtp_c8 dhhtp_c8	
 
+tab dhhtp_c8, gen(Dhhtp_c8_)
+
 // Squared income variable
 cap cap gen ypnbihs_dv_sq = ypnbihs_dv^2
 label variable ypnbihs_dv_sq "Personal Non-benefit Gross Income Squared"
@@ -89,6 +98,13 @@ cap gen l_dhe_pcs = dhe_pcs[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-
 cap gen l_dhe_mcs = dhe_mcs[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
 cap gen l_dlltsd = dlltsd[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
 cap gen l_dlltsd01 = dlltsd01[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
+cap gen l_dnc = dnc[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
+cap gen l_dnc02 = dnc02[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
+cap gen l_dcpst = dcpst[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
+cap gen l_dhhtp_c8 = dhhtp_c8[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
+cap gen l_dhh_owned = dhh_owned[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
+cap gen l_yptciihs_dv = yptciihs_dv[_n-1] if idperson == idperson[_n-1] & swv == swv[_n-1] + 1 
+
 
 // Fill in missing lags using current values at age 16
 gsort +idperson -stm
@@ -291,12 +307,25 @@ rename L_Dhhtp_c4_2 Dhhtp_c4_CoupleChildren_L1
 rename L_Dhhtp_c4_3 Dhhtp_c4_SingleNoChildren_L1
 rename L_Dhhtp_c4_4 Dhhtp_c4_SingleChildren_L1
 
+tab l_dhhtp_c8, gen(L_Dhhtp_c8_)
+forvalues i=1/8 {
+rename L_Dhhtp_c8_`i' Dhhtp_c8_`i'_L1
+}
+
 tab dot, gen(dot_)
 rename dot_1 Ethn_White
 rename dot_2 Ethn_Asian
 rename dot_3 Ethn_Black
 rename dot_4 Ethn_Other
 
+tab dcpst, gen(Dcpst_)
+rename Dcpst_1 Dcpst_Partnered
+rename Dcpst_2 Dcpst_Single
+
+tab l_dcpst, gen(L_Dcpst_)
+rename L_Dcpst_1 Dcpst_Partnered_L1
+rename L_Dcpst_2 Dcpst_Single_L1
+
 
 cap gen Year_transformed = stm  
 
@@ -321,5 +350,20 @@ cap gen Dlltsd01 = dlltsd01
 cap gen Dlltsd_L1 = l_dlltsd
 cap gen Dlltsd01_L1 = l_dlltsd01
 
+cap gen FertilityRate = dukfr
+
+cap gen Dnc = dnc 
+
+cap gen Dnc02 = dnc02
+
+rename l_dnc Dnc_L1 
+
+rename l_dnc02 Dnc02_L1 
+
+gen Ypnbihs_dv = ypnbihs_dv
 
+gen Yptciihs_dv = yptciihs_dv
+gen Yptciihs_dv_L1 = l_yptciihs_dv
 
+gen Dhh_owned = dhh_owned
+gen Dhh_owned_L1 = l_dhh_owned
diff --git a/input/InitialPopulations/compile/do_emphist/00_Master_emphist.do b/input/InitialPopulations/compile/do_emphist/00_Master_emphist.do
new file mode 100644
index 000000000..1492a7954
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/00_Master_emphist.do
@@ -0,0 +1,70 @@
+****************************************************************************************************
+* PROJECT:              UKMOD update: construct a UKMOD-UKHLS database from UKHLS dataset
+* DO-FILE NAME:         00_Master.do
+* DESCRIPTION:          Main do-file governing the creation of employment history data
+*                       which is required for the generation of some UKMOD variables 
+*
+* PURPOSE:              The code reconstructs each respondent’s employment history month by month 
+*                       by combining information from the UKHLS and the older BHPS surveys.
+*                       The scripts rebuild employment history using respondents’ reported current activity and interview dates across waves.
+*                       The process links together:
+*                       - the timing of interviews,
+*                       - reported employment and non-employment spells, and
+*                       - transitions between BHPS and UKHLS for legacy sample members.
+*                       The result is a dataset showing, for every person, whether they were employed in each month 
+*                       since Jan 2007. 
+*        
+*
+*                       The final output liwwh — the total number of months a person has been employed since January 2007.
+*                       This provides a consistent measure of accumulated work experience over the observation window, 
+*                       suitable for use in UKMOD and labour-supply model.
+*
+*
+* NOTES:                Potentially the timeline could be extended backwards using data from 
+*                       the UKHLS Lifetime Employment Status History modules in Waves 1 and 5 
+*                       which collected retrospective work histories from subsets of respondents. 
+*                       A sample scripts by Liam Wright are available but outdated: 
+*                       https://www.understandingsociety.ac.uk/documentation/mainstage/syntax/user-deposited-syntax/working-life-histories/
+***********************************************************************************************************
+* UKHLS VERSION:        UKDA-6931 Special License version 2009-2023
+* AUTHORS:              Nick Buck, Ricky Kanabar, Patryk Bronka, Daria Popova 
+* LAST REVISION:        30 Oct 2025 DP 
+***********************************************************************************************************
+
+************************************************************************
+* Run sub-scripts
+************************************************************************
+cd ${dir_data_emphist}
+/* */
+* 01_Intdate.do: set up cross-wave file of interview dates 
+* ==> needed to link previous wave interview date to each respondent*/
+do ${dir_do_emphist}/01_Intdate.do  
+
+
+* 02_Lwintdat.do: create files of previous wave interview dates for waves c-n
+* ==> helps align spells across waves for UKHLS respondents
+do ${dir_do_emphist}/02_Lwintdat.do
+
+* 03_Bhps_lintdate.do: get last interview date under BHPS
+* ==> also creates previous wave interview dates for wave b
+do ${dir_do_emphist}/03_Bhps_lintdate.do 
+
+* 04_Sp0_1_2a.do: create wave-specific spell files for everyone
+* ==> each spell = period of employment/non-employment, continuous across months
+* ==> Note: This does not pick up all possible variables from employment history, could be modified to pick up additional ones 
+do ${dir_do_emphist}/04_Sp0_1_2a.do 
+
+* 05_Newentrant1.do: create spell file based on wave of entry (start of first job)
+* ==> captures employment history for new entrants; fills gaps where possible
+do ${dir_do_emphist}/05_Newentrant1.do  
+
+* 06_Aspells1.do: create file containing all spells across waves
+* ==> obtains spell start date from previous spell end date
+* ==> fills some missing dates; drops cases with insufficient data
+do ${dir_do_emphist}/06_Aspells1.do 
+
+ 
+* 07_Empcal1a.do: create monthly employment calendar ==> used to calculate total months in employment per individual
+do ${dir_do_emphist}/07_Empcal1a.do
+
+
diff --git a/input/InitialPopulations/compile/do_emphist/01_Intdate.do b/input/InitialPopulations/compile/do_emphist/01_Intdate.do
new file mode 100644
index 000000000..afae4c163
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/01_Intdate.do
@@ -0,0 +1,60 @@
+*************************************************************************************************
+* PROJECT:   UKMOD update – create employment history data from UKHLS
+* FILE:      01_Intdate.do
+*
+* PURPOSE:
+*   Creates a *cross-wave file of interview dates* for all waves (a–n) of UKHLS.
+*   This file is later used in 02_Lwintdate.do to determine the *previous*
+*   interview date for each respondent.
+*
+* CONTEXT:
+*   - Reads xwaveid (cross-wave identifier file) and merges interview date
+*     variables from each wave’s individual response file (`_indresp`).
+*   - Excludes proxy interviews (where `ivfio > 1`).
+*   - Converts interview month/year into a continuous “months since 2009”
+*     variable (`_mns09`).
+*
+* OUTPUTS:
+*   - intdate.dta : combined dataset with interview dates and months-since-2009
+*************************************************************************************************
+
+cap log close 
+log using "${dir_log_emphist}/01_Intdate.log", replace
+
+* The list of waves is defined *globally* in 00_Master.do 
+local waves $UKHLSwaves                        // copy global into a local for use here
+local n: word count `waves'          // number of waves
+
+********************************************************************
+* MERGE INTERVIEW DATES FROM EACH WAVE
+********************************************************************
+use ${dir_ukhls_data}/xwaveid, clear
+
+forvalues i = 1/`n' {
+    local w : word `i' of `waves'
+    
+    * Merge interview date variables from each wave’s individual response file
+merge 1:1 pidp using ${dir_ukhls_data}/`w'_indresp , ///
+    keepusing(`w'_intdatd_dv `w'_intdatm_dv `w'_intdaty_dv)
+
+    
+    * Exclude proxy interviews (ivfio > 1 means proxy or non-response)
+    replace `w'_intdatd_dv = . if `w'_ivfio > 1
+    replace `w'_intdatm_dv = . if `w'_ivfio > 1
+    replace `w'_intdaty_dv = . if `w'_ivfio > 1
+    
+    drop _merge
+
+    * Compute months since 2009 for timeline consistency
+    gen `w'_mns09 = 12 * (`w'_intdaty_dv - 2009) + `w'_intdatm_dv ///
+        if `w'_intdaty_dv > 0 & `w'_intdatm_dv > 0
+    
+    //tab `w'_mns09
+}
+
+
+save intdate, replace
+
+
+clear
+cap log close
diff --git a/input/InitialPopulations/compile/do_emphist/02_Lwintdat.do b/input/InitialPopulations/compile/do_emphist/02_Lwintdat.do
new file mode 100644
index 000000000..881fba914
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/02_Lwintdat.do
@@ -0,0 +1,123 @@
+*************************************************************************************************
+* PROJECT:   UKMOD update – create employment history data from UKHLS
+* FILE:      02_Lwintdate.do
+*
+* PURPOSE:
+*   Creates variables identifying the *previous interview date* for each wave
+*   (from c to n) based on cross-wave interview date information.
+*
+* CONTEXT:
+*   - Uses the cross-wave dataset created in 01_Intdate.do.
+*   - For each wave, finds the respondent’s most recent previous interview
+*     (if any) and records its date (year, month, day) and wave number.
+*   - The first two waves (a, b) have no valid "previous" interview, so
+*     processing starts from wave c.
+*
+* OUTPUTS:
+*   - Variables: <wave>_lwint, <wave>_lintdaty, <wave>_lintdatm, <wave>_lintdatd
+*   - Files:     <wave>_lint.dta (for each wave c–n)
+*************************************************************************************************
+
+cap log close
+log using "${dir_log_emphist}/02_Lwintdate.log", replace
+
+use intdate, clear
+
+
+*************************************************************************************************
+* DEFINE WAVES AND ASSOCIATED VARIABLES
+*************************************************************************************************
+local waves $UKHLSwaves // copy global into a local for use here
+           
+
+* Build lists of corresponding variable names for each wave
+local rvars
+local yvars
+local mvars
+local dvars
+
+foreach w of local waves {
+    local rvars "`rvars' `w'_ivfio"         // fieldwork outcome (1 = full interview)
+    local yvars "`yvars' `w'_intdaty_dv"    // interview year
+    local mvars "`mvars' `w'_intdatm_dv"    // interview month
+    local dvars "`dvars' `w'_intdatd_dv"    // interview day
+}
+
+local nwaves : word count `waves'
+
+
+*************************************************************************************************
+* CREATE VARIABLES FOR PREVIOUS INTERVIEW DATES
+*************************************************************************************************
+forvalues w = 3/`nwaves' {                         // start from wave c
+    local curwave : word `w' of `waves'            // current wave (e.g., "c")
+    local prevmax = `w' - 1                        // number of prior waves
+
+    di as text "Processing wave `curwave' (previous up to wave `prevmax')"
+
+    * Initialise variables for this wave
+    gen `curwave'_lwint     = 0                   // previous wave index number
+    gen `curwave'_lintdaty  = -9                  // previous interview year
+    gen `curwave'_lintdatm  = -9                  // previous interview month
+    gen `curwave'_lintdatd  = -9                  // previous interview day
+
+    * Check all earlier waves to find last valid interview
+    forvalues i = 1/`prevmax' {
+        local rw : word `i' of `rvars'
+        local yw : word `i' of `yvars'
+        local mw : word `i' of `mvars'
+        local dw : word `i' of `dvars'
+
+        * Replace if respondent was interviewed in both current and earlier wave
+        replace `curwave'_lwint    = `i'  if `curwave'_ivfio==1 & `rw'==1
+        replace `curwave'_lintdaty = `yw' if `curwave'_ivfio==1 & `rw'==1
+        replace `curwave'_lintdatm = `mw' if `curwave'_ivfio==1 & `rw'==1
+        replace `curwave'_lintdatd = `dw' if `curwave'_ivfio==1 & `rw'==1
+    }
+}
+
+
+*************************************************************************************************
+* SAVE INTERMEDIATE DATASET WITH ALL WAVES
+*************************************************************************************************
+save intdate1, replace
+drop if memorig==8   // exclude temporary or non-original household members
+
+
+*************************************************************************************************
+* EXPORT WAVE-SPECIFIC FILES
+*************************************************************************************************
+foreach w of local waves {
+    if inlist("`w'", "a", "b") continue           // skip first two waves (no prior interviews)
+
+    di as text "Saving previous interview data for wave `w'..."
+
+    keep if `w'_ivfio==1                          // respondents with valid interview
+    keep pidp `w'_lwint `w'_lintdaty `w'_lintdatm `w'_lintdatd
+
+    save `w'_lint, replace                        // e.g., "c_lint.dta", "d_lint.dta", etc.
+
+    use intdate1, clear                           // reload full dataset for next wave
+    drop if memorig==8
+}
+
+
+
+clear
+cap log close
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/input/InitialPopulations/compile/do_emphist/03_Bhps_lintdate.do b/input/InitialPopulations/compile/do_emphist/03_Bhps_lintdate.do
new file mode 100644
index 000000000..234b59b8e
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/03_Bhps_lintdate.do
@@ -0,0 +1,98 @@
+/*************************************************************************************************
+* PROJECT:   UKMOD update – create employment history data from UKHLS & BHPS
+* FILE:      b_lint.do
+*
+* PURPOSE:
+*   Bridges the BHPS and UKHLS panels by identifying the *most recent BHPS interview*
+*   for each respondent before their first UKHLS interview (wave B).
+*
+* CONTEXT:
+*   - The BHPS (1991–2008) sample was incorporated into UKHLS starting in wave B (2009–10).
+*   - This script links the BHPS interview history to the first UKHLS observation
+*     so that employment and household histories remain continuous across the two panels.
+*   - It uses BHPS individual response data (waves L–R) and the combined UKHLS intdate file.
+*
+* OUTPUTS:
+*   - bhps_lint.dta : most recent BHPS interview date before UKHLS
+*   - b_lint.dta     : previous interview info for wave B (merged BHPS or wave A)
+*************************************************************************************************/
+
+cap log close
+log using "${dir_log_emphist}/03_Bhps_lintdate.log", replace
+
+/*************************************************************************************************
+* BUILD BHPS LAST INTERVIEW FILE (1991–2008)
+*************************************************************************************************/
+
+use ${dir_bhps_data}/xwaveid_bh, clear
+
+gen lwint     = 0
+gen lintdatd  = 0
+gen lintdatm  = 0
+gen lintdaty  = 0
+
+* Define BHPS waves included (update global once in master file)
+local waves $BHPS_waves
+local nwaves : word count `waves'
+
+forvalues i = 1/`nwaves' {
+    local w : word `i' of `waves'
+
+    * Merge BHPS individual response data for this wave
+    merge 1:1 pidp using ${dir_bhps_data}/b`w'_indresp, ///
+        keepusing(b`w'_istrtdatd b`w'_istrtdatm b`w'_istrtdaty b`w'_ivfio)
+    
+    * Keep valid (non-proxy) interviews
+    replace lintdatd = b`w'_istrtdatd if b`w'_ivfio == 1
+    replace lintdatm = b`w'_istrtdatm if b`w'_ivfio == 1
+    replace lintdaty = b`w'_istrtdaty if b`w'_ivfio == 1
+    replace lwint    = `i' if b`w'_ivfio == 1
+    
+    drop _merge
+}
+
+keep if lwint > 0
+keep pidp lwint lintdatd lintdatm lintdaty
+save bhps_lint, replace
+
+
+/*************************************************************************************************
+* LINK BHPS TO UKHLS WAVE B 
+*************************************************************************************************/
+
+use intdate1, clear
+
+* Merge with BHPS last interview info
+merge 1:1 pidp using bhps_lint
+drop if _merge == 2   // BHPS-only cases (not in UKHLS)
+
+* Keep only those with full interviews in wave B
+keep if b_ivfio == 1
+
+tab memorig
+
+* Initialise
+gen b_lwint     = 0
+gen b_lintdaty  = -9
+gen b_lintdatm  = -9
+gen b_lintdatd  = -9
+
+* Link to UKHLS wave A (if available)
+replace b_lwint    = 1             if a_ivfio == 1
+replace b_lintdatd = a_intdatd_dv  if a_ivfio == 1
+replace b_lintdatm = a_intdatm_dv  if a_ivfio == 1
+replace b_lintdaty = a_intdaty_dv  if a_ivfio == 1
+
+* Replace with BHPS last interview info where available (merge==3)
+replace b_lwint    = lwint + 11    if _merge == 3
+replace b_lintdatd = lintdatd      if _merge == 3
+replace b_lintdatm = lintdatm      if _merge == 3
+replace b_lintdaty = lintdaty      if _merge == 3
+
+tab b_lwint
+
+keep pidp b_lwint b_lintdaty b_lintdatm b_lintdatd
+save b_lint, replace
+
+clear
+cap log close
diff --git a/input/InitialPopulations/compile/do_emphist/04_Sp0_1_2a.do b/input/InitialPopulations/compile/do_emphist/04_Sp0_1_2a.do
new file mode 100644
index 000000000..2df340a19
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/04_Sp0_1_2a.do
@@ -0,0 +1,223 @@
+/*************************************************************************************************
+* PROJECT:   UKMOD update – create employment history data from UKHLS
+* FILE:      04_Sp0_1_2a.do
+*
+* PURPOSE:
+*   Constructs employment history “spells” for each UKHLS wave (b–n).
+*   For each wave, it:
+*     - Identifies employment/non-employment episodes and transitions
+*     - Determines start and end dates of each spell
+*     - Produces three datasets: sp0 (initial), sp1 (main), sp2 (reshaped)
+*
+* CONTEXT:
+*   - Uses previous interview information from ${wp}lint.dta
+*   - Requires individual respondent data from ${original_data}/${wp}indresp.dta
+*
+* OUTPUTS:
+*   - ${wp}sp0.dta : initial spell definitions
+*   - ${wp}sp1.dta : continuation spells
+*   - ${wp}sp2.dta : reshaped multi-episode structure
+*************************************************************************************************/
+
+cap log close
+log using "${dir_log_emphist}/04_Sp0_1_2a.log", replace
+
+
+local wps  ${UKHLS_waves_prefixed}
+local wvno ${UKHLS_panel_waves_numbers}
+
+local n : word count `wps'     // number of waves to process
+
+
+/*************************************************************************************************
+* LOOP THROUGH EACH WAVE
+*************************************************************************************************/
+
+forvalues i = 1/`n' {
+
+    global wp : word `i' of `wps'    // wave prefix (e.g. b_, c_, etc.)
+    global wv : word `i' of `wvno'   // wave numeric label
+
+    di as text "------------------------------------------------------"
+    di as text "Processing wave ${wp} (numeric ${wv})..."
+    di as text "------------------------------------------------------"
+
+
+    /*************************************************************************************************
+    * PREPARE INDRESP DATA AND MERGE WITH PREVIOUS INTERVIEW FILE
+    *************************************************************************************************/
+
+    use ${dir_ukhls_data}/${wp}indresp.dta, clear
+    keep if ${wp}ivfio == 1                 // keep full interviews only
+    drop if ${wp}hhorig == 8                // drop non-original HH members
+
+    merge 1:1 pidp using ${wp}lint
+    drop _merge
+
+    rename ${wp}* *                         // remove wave prefix 
+
+    keep pidp jbsemp jbstat notempchk - nxtst nxtstelse - cjbatt ///
+        ff_ivlolw ff_emplw ff_jbsemp ff_jbstat intdatd_dv intdatm_dv intdaty_dv ///
+        lwint lintdaty lintdatm lintdatd
+
+
+    /*************************************************************************************************
+    * DEFINE EMPLOYMENT FLAGS AND END DATE VARIABLES
+    *************************************************************************************************/
+
+    gen aehhas = 1
+    replace aehhas = 0 if empchk == -8 & notempchk == -8
+    keep if aehhas == 1
+
+    gen enddatestat = 0
+    replace enddatestat = 1 if empchk == 1
+    replace enddatestat = 2 if notempchk == 1 & empchk != 1
+    replace enddatestat = 3 if empchk == 2
+    replace enddatestat = 4 if notempchk == 2 & empchk == -8
+    replace enddatestat = 1 if enddatestat == 0 & empchk != -8
+    replace enddatestat = 2 if enddatestat == 0 & notempchk != -8
+    replace enddatestat = 5 if enddatestat == 1 & (jbsamr == 2 | samejob == 2)
+
+    gen endday   = intdatd_dv if enddatestat < 3
+    gen endmonth = intdatm_dv if enddatestat < 3
+    gen endyear  = intdaty_dv if enddatestat < 3
+
+    replace endday   = jbendd  if enddatestat == 5
+    replace endmonth = jbendm  if enddatestat == 5
+    replace endyear  = jbendy4 if enddatestat == 5
+
+    replace endday   = empstendd  if inlist(enddatestat, 3, 4)
+    replace endmonth = empstendm  if inlist(enddatestat, 3, 4)
+    replace endyear  = empstendy4 if inlist(enddatestat, 3, 4)
+
+    save ${wp}sp1a, replace        // store intermediate version
+
+
+    /*************************************************************************************************
+    * CREATE SPELL 0 DATASET (INITIAL EPISODE)
+    *************************************************************************************************/
+
+    gen startday   = lintdatd
+    gen startmonth = lintdatm
+    gen startyear  = lintdaty
+    gen stdatestat = 1
+
+    gen espstat = jbstat
+    replace espstat = 1 if jbsemp == 2
+    replace espstat = 2 if jbsemp == 1
+    replace espstat = ff_jbstat if enddatestat == 4
+    replace espstat = 1 if enddatestat == 3 & ff_jbsemp == 2
+    replace espstat = 2 if enddatestat == 3 & ff_jbsemp == 1
+    replace espstat = 2 if enddatestat == 5 & espstat > 2
+
+    gen wave  = ${wv}
+    gen spell = 0
+
+    keep pidp wave spell lwint - espstat lintdatd lintdatm lintdaty intdatm_dv intdaty_dv
+    save ${wp}sp0, replace
+
+
+    /*************************************************************************************************
+    * CREATE SPELL 1 DATASET (CONTINUATION EPISODES)
+    *************************************************************************************************/
+
+    use ${wp}sp1a, clear
+    keep if enddatestat > 2
+
+    rename endday startday
+    rename endmonth startmonth
+    rename endyear startyear
+    gen stdatestat = 2
+
+    rename enddatestat edstat1
+
+    * Determine new end dates
+    gen enddatestat = 0
+    replace enddatestat = 1 if cjob == 1
+    replace enddatestat = 3 if cjob == 2
+    replace enddatestat = 2 if cstat == 2 & enddatestat == 0
+    replace enddatestat = 4 if cstat == 1 & enddatestat == 0
+    replace enddatestat = 1 if enddatestat == 0 & jbsemp != -8
+    replace enddatestat = 2 if enddatestat == 0 & jbsemp == -8
+
+    gen endday   = intdatd_dv if enddatestat < 3
+    gen endmonth = intdatm_dv if enddatestat < 3
+    gen endyear  = intdaty_dv if enddatestat < 3
+
+    replace endday   = nxtjbendd  if enddatestat == 3
+    replace endmonth = nxtjbendm  if enddatestat == 3
+    replace endyear  = nxtjbendy4 if enddatestat == 3
+
+    replace endday   = nxtstendd  if enddatestat == 4
+    replace endmonth = nxtstendm  if enddatestat == 4
+    replace endyear  = nxtstendy4 if enddatestat == 4
+
+    gen espstat = jbstat if enddatestat == 2
+    replace espstat = 1 if jbsemp == 2 & enddatestat == 1
+    replace espstat = 2 if jbsemp == 1 & enddatestat == 1
+    replace espstat = nxtstelse + 2 if enddatestat == 4 & nxtstelse > 0
+    replace espstat = nxtstelse     if enddatestat == 4 & nxtstelse > -8 & nxtstelse < 0
+    replace espstat = 1 if enddatestat == 3 & nxtjbes == 2
+    replace espstat = 2 if enddatestat == 3 & nxtjbes > -8 & nxtjbes < 2
+    replace espstat = 2 if enddatestat == 1 & missing(espstat)
+
+    gen wave  = ${wv}
+    gen spell = 1
+
+    keep pidp wave spell lwint startday - espstat lintdatd lintdatm lintdaty intdatm_dv intdaty_dv
+    save ${wp}sp1, replace
+
+
+    /*************************************************************************************************
+    * CREATE SPELL 2 DATASET (RESHAPED MULTI-EPISODE STRUCTURE)
+    *************************************************************************************************/
+
+    use ${dir_ukhls_data}/${wp}indresp.dta, clear
+    rename ${wp}* *
+    keep if ivfio == 1
+    drop if hhorig == 8
+
+    keep pidp nextstat* nextelse* currstat* nextjob* currjob* jobhours* statendd* statendm* statendy4*
+
+    reshape long nextstat nextelse currstat nextjob currjob jobhours statendd statendm statendy4, i(pidp) j(sp2)
+    drop if nextstat == -8
+
+    quietly merge m:1 pidp using ${dir_ukhls_data}/${wp}indresp, ///
+        keepusing(${wp}intdatd_dv ${wp}intdatm_dv ${wp}intdaty_dv)
+    keep if _merge == 3
+    drop _merge
+
+    merge m:1 pidp using ${wp}lint
+    keep if _merge == 3
+    drop _merge
+    rename ${wp}* *
+
+    gen enddatestat = 0
+    replace enddatestat = 1 if currjob == 1
+    replace enddatestat = 3 if currjob == 2
+    replace enddatestat = 4 if currstat == 1
+    replace enddatestat = 2 if currstat > -8 & enddatestat == 0
+
+    gen endday   = intdatd_dv 
+    gen endmonth = intdatm_dv 
+    gen endyear  = intdaty_dv 
+
+    replace endday   = statendd  if enddatestat > 2
+    replace endmonth = statendm  if enddatestat > 2
+    replace endyear  = statendy4 if enddatestat > 2
+
+    gen espstat = nextstat
+    replace espstat = nextelse + 2 if nextelse > 0
+    replace espstat = nextelse     if nextstat == 2 & nextelse < 0
+    replace espstat = 1 if nextjob == 1
+    replace espstat = 2 if nextjob > 1
+    replace espstat = 2 if nextjob > -8 & nextjob < 0
+
+    gen spell = sp2 + 1
+    gen wave  = ${wv}
+
+    keep pidp spell wave endday endmonth endyear enddatestat espstat lintdatd lintdatm lintdaty intdatm_dv intdaty_dv
+    save ${wp}sp2, replace
+}
+
+cap log close
diff --git a/input/InitialPopulations/compile/do_emphist/05_Newentrant1.do b/input/InitialPopulations/compile/do_emphist/05_Newentrant1.do
new file mode 100644
index 000000000..f8efe7ca9
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/05_Newentrant1.do
@@ -0,0 +1,191 @@
+/*************************************************************************************************
+* PROJECT:   UKMOD update – create employment history data from UKHLS
+* FILE:      05_Newentrant1.do
+*
+* PURPOSE:
+*   Constructs “new entrant” employment spells for all available waves (A–latest).
+*   For each wave, identifies individuals who recently entered employment,
+*   infers start and end dates, and creates wave-specific spell files.
+*
+* NOTES:
+*   Patryk’s comment said that b_jbbgdat{y,m,d} were missing from earlier release data
+*   and obtained from Graham. It looks like they were added to the new release,
+*   therefore special treatment of wave B is no longer needed.
+*************************************************************************************************/
+
+cap log close
+log using "${dir_log_emphist}/05_Newentrant1.log", replace
+
+/******************************************************************************
+ * WAVE A: process separately (Nick Buck original logic)
+ ******************************************************************************/
+di as text "------------------------------------------------------"
+di as text "Processing new entrant spells for wave a (numeric 1)"
+di as text "------------------------------------------------------"
+
+capture use ${dir_ukhls_data}/a_indresp.dta, clear
+
+    tab a_jbbgm a_jbsemp
+    tab a_jbbgy if a_jbbgm > 0 & a_jbsemp > 0
+    tab a_jbbgy if a_jbbgm < 0 & a_jbsemp > 0
+    tab a_jbhad
+    drop if a_ivfio==2
+    rename a_* *
+    gen spell=0
+    gen wave=1
+    gen espstat=jbstat
+    replace espstat=1 if jbsemp==2
+    replace espstat=2 if jbsemp==1
+    tab espstat jbsemp
+    gen endyear=intdaty_dv
+    gen endmonth=intdatm_dv
+    gen endday=intdatd_dv
+    gen startmonth=-9
+    gen startyear=-9
+    gen startday=-9
+    replace startyear=jbbgy if jbsemp > 0
+    replace startmonth=jbbgm if jbsemp > 0
+    replace startday=jbbgd if jbsemp > 0
+    tab jlendm jbhad
+    replace startyear=jlendy if jbhad==1
+    replace startmonth=jlendm if jbhad==1
+    replace startday=1 if jbhad==1
+    tab jbhad jbsemp
+    tab jbstat if jbhad==2
+    replace startyear=2007 if jbhad==2
+    replace startmonth=1 if jbhad==2
+    replace startday=1 if jbhad==2
+    tab startyear
+    tab jbstat if startyear < 0
+    replace startyear=2007 if startyear < 0
+    tab startyear if startmonth < 0
+    replace startmonth=1 if startmonth < 0
+    tab espstat
+    tab jbstat
+    gen ne=1
+    keep pidp spell wave espstat endyear endmonth endday startmonth startyear startday intdaty_dv intdatm_dv ne
+    save a_sp0_ne, replace
+
+    di as text "Saved a_sp0_ne.dta successfully."
+
+
+/******************************************************************************
+ * LOOP THROUGH WAVES (b ... n) using master globals
+ ******************************************************************************/
+local wps  ${UKHLS_waves_prefixed}
+local wvno ${UKHLS_panel_waves_numbers}
+
+local n : word count `wps'
+
+forvalues i = 1/`n' {
+    local wp : word `i' of `wps'     // prefix e.g. b_
+    local wn : word `i' of `wvno'    // numeric e.g. 2
+
+    di as text "------------------------------------------------------"
+    di as text "Processing new entrant spells for wave `wp' (numeric `wn')"
+    di as text "------------------------------------------------------"
+
+    use ${dir_ukhls_data}/`wp'indresp.dta, clear
+    
+    * harmonise variable names (=remove wave prefix)
+    rename `wp'* *
+
+    /*************************************************************************************************
+    * CHECK THAT REQUIRED VARIABLES EXIST
+    * (if some essential vars missing, warn and skip)
+    *************************************************************************************************/
+    local reqvars "pidp ivfio hhorig notempchk empchk jbsemp jbstat intdaty_dv intdatm_dv intdatd_dv jbhad jlendy jlendm jbbgy jbbgm jbbgd"
+    local missing_vars
+
+    foreach v of local reqvars {
+        capture confirm variable `v'
+        if _rc local missing_vars "`missing_vars' `v'"
+    }
+
+    if "`missing_vars'" != "" {
+        di as error "WARNING: Missing variables in wave `wp': `missing_vars'"
+        di as text  "Skipping this wave..."
+        continue
+    }
+
+    /*************************************************************************************************
+    * BASIC SUMMARY FOR DIAGNOSTICS
+    *************************************************************************************************/
+    di as text "Variable overview for wave `wp':"
+    summarize jbsemp jbstat jbhad jbbgy jbbgm jbbgd jlendy jlendm
+
+    /*************************************************************************************************
+    * FILTER AND PROCESS
+    *************************************************************************************************/
+    drop if ivfio == 2           // exclude proxy interviews
+    capture confirm variable hhorig
+    if !_rc {
+        drop if hhorig == 8          // exclude temporary members (if variable exists)
+    }
+
+    gen aehhas = 1
+    replace aehhas = 0 if notempchk == -8 & empchk == -8
+    keep if aehhas == 0
+
+    * Fill jbbg values if missing
+    capture confirm variable jbbgdaty
+    if !_rc {
+        replace jbbgy=jbbgdaty if jbbgy < 0 & jbbgdaty > 0 & jbbgdaty != .
+        replace jbbgm=jbbgdatm if jbbgm < 0 & jbbgdatm > 0 & jbbgdatm != .
+        replace jbbgd=jbbgdatd if jbbgd < 0 & jbbgdatd > 0 & jbbgdatd != .
+    }
+
+    gen spell = 0
+    gen wave  = `wn'
+
+    * Define employment status at spell end
+    gen espstat = jbstat
+    replace espstat = 1 if jbsemp == 2
+    replace espstat = 2 if jbsemp == 1
+
+    gen endyear  = intdaty_dv
+    gen endmonth = intdatm_dv
+    gen endday   = intdatd_dv
+
+    * Default missing start dates
+    gen startyear  = -9
+    gen startmonth = -9
+    gen startday   = -9
+
+    * Fill start date from job start info (if employed)
+    replace startyear  = jbbgy if jbsemp > 0
+    replace startmonth = jbbgm if jbsemp > 0
+    replace startday   = jbbgd if jbsemp > 0
+
+    * For those who had a job previously (jbhad == 1)
+    replace startyear  = jlendy if jbhad == 1
+    replace startmonth = jlendm if jbhad == 1
+    replace startday   = 1      if jbhad == 1
+
+    * If no job since 2007, assign default early date
+    replace startyear  = 2007 if jbhad == 2 | startyear < 0
+    replace startmonth = 1    if jbhad == 2 | startmonth < 0
+    replace startday   = 1    if jbhad == 2 | startday < 0
+
+    * Flag for new entrant
+    gen ne = 1
+
+    /*************************************************************************************************
+    * SAVE WAVE-SPECIFIC SPELL FILE
+    *************************************************************************************************/
+    keep pidp spell wave espstat endyear endmonth endday ///
+         startmonth startyear startday intdaty_dv intdatm_dv ne
+
+    save `wp'sp0_ne, replace
+
+    di as text "Saved `wp'sp0_ne.dta successfully."
+}
+
+/******************************************************************************
+ * END
+ ******************************************************************************/
+di as text "------------------------------------------------------"
+di as text "All available waves processed. Check logs for warnings."
+di as text "------------------------------------------------------"
+
+cap log close
diff --git a/input/InitialPopulations/compile/do_emphist/06_Aspells1.do b/input/InitialPopulations/compile/do_emphist/06_Aspells1.do
new file mode 100644
index 000000000..f0bcf2913
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/06_Aspells1.do
@@ -0,0 +1,160 @@
+/*************************************************************************************************
+* PROJECT:   UKMOD update – create employment history data from UKHLS
+* FILE:      06_Aspells1.do
+*
+* PURPOSE:
+*   Combines all wave-specific employment spell files (sp0, sp0_ne, sp1, sp2)
+*   into a single dataset covering all available waves.
+*   Derives consistent start and end dates, imputes missing dates,
+*   and removes invalid or inconsistent spells.
+*
+* NOTES:
+*   - BHPS-origin members receive approximate start dates if missing.
+*************************************************************************************************/
+
+cap log close
+log using "${dir_log_emphist}/06_Aspells1.log", replace
+
+/*************************************************************************************************
+ * INITIALISE AND APPEND SPELL FILES
+ *************************************************************************************************/
+
+di as text "------------------------------------------------------"
+di as text "Combining wave-specific spell files into one dataset"
+di as text "------------------------------------------------------"
+
+* Start with wave a
+use a_sp0_ne, clear
+
+* Loop through later waves using global list from master file
+local wps ${UKHLS_waves_prefixed}
+local n : word count `wps'
+
+forvalues i = 1/`n' {
+    local wp : word `i' of `wps'
+    di as text "Appending spell files for wave `wp'..."
+
+    capture append using `wp'sp0
+    capture append using `wp'sp0_ne
+    capture append using `wp'sp1
+    capture append using `wp'sp2
+}
+
+di as text "All wave-specific spell files appended successfully."
+
+/*************************************************************************************************
+ * MERGE WITH CROSS-WAVE IDENTIFIER
+ *************************************************************************************************/
+
+di as text "Merging with xwaveid file to obtain memorig variable..."
+merge m:1 pidp using ${dir_ukhls_data}/xwaveid, keepusing(memorig)
+keep if _merge == 3
+drop _merge
+
+/*************************************************************************************************
+ * IMPUTE MISSING BHPS INTERVIEW DATES
+ *************************************************************************************************/
+
+di as text "Applying BHPS date fix for legacy members..."
+gen bhps = 0
+replace bhps = 1 if memorig > 2 & memorig < 7
+
+replace startyear  = 2008 if bhps == 1 & startyear  == -9
+replace startmonth = 9    if bhps == 1 & startmonth == -9
+replace startday   = 1    if bhps == 1 & startday   == -9
+
+/*************************************************************************************************
+ * ADJUST START AND END DATES USING INTERVIEW TIMING
+ *************************************************************************************************/
+
+di as text "Adjusting spell dates relative to previous and current interviews..."
+
+gen valdat1 = 0
+replace valdat1 = 1 if lintdaty > 0 & lintdatm > 0 & endyear > 0 & endmonth > 0
+gen durat1 = 12 * (endyear - lintdaty) + (endmonth - lintdatm) if valdat1 == 1
+
+gen valdat2 = 0
+replace valdat2 = 1 if lintdaty > 0 & lintdatm > 0 & startyear > 0 & startmonth > 0
+gen durat2 = 12 * (startyear - lintdaty) + (startmonth - lintdatm) if valdat2 == 1
+
+replace endyear  = lintdaty if durat1 < 0
+replace endmonth = lintdatm if durat1 < 0
+replace endday   = lintdatd if durat1 < 0
+replace startyear  = lintdaty if durat2 < 0
+replace startmonth = lintdatm if durat2 < 0
+replace startday   = lintdatd if durat2 < 0
+
+/*************************************************************************************************
+ * FILL START DATES FROM PREVIOUS SPELLS
+ *************************************************************************************************/
+
+sort pidp wave spell
+replace startyear  = endyear[_n-1]  if spell > 1
+replace startmonth = endmonth[_n-1] if spell > 1
+replace startday   = endday[_n-1]   if spell > 1
+
+/*************************************************************************************************
+ * COMPUTE MIDPOINT DATES (FOR MISSING VALUES)
+ *************************************************************************************************/
+
+gen lint00  = 12 * (lintdaty - 2000) + lintdatm if lintdaty > 0 & lintdatm > 0
+gen int00   = 12 * (intdaty_dv - 2000) + intdatm_dv if intdaty_dv > 0 & intdatm_dv > 0
+gen interval = int00 - lint00
+gen mint00  = lint00 + round(interval / 2)
+gen midyear = 2000 + int(mint00 / 12)
+gen midmonth = mint00 - 12 * int(mint00 / 12)
+replace midyear  = midyear - 1 if midmonth == 0
+replace midmonth = 12          if midmonth == 0
+
+/*************************************************************************************************
+ * MANUAL IMPUTATIONS FOR PARTIAL MISSING MONTHS
+ *************************************************************************************************/
+
+replace endmonth   = 1  if endyear == intdaty_dv & endmonth < 0
+replace endmonth   = 12 if endyear == lintdaty  & endmonth < 0
+replace endmonth   = 6  if endyear > lintdaty & endyear < intdaty_dv & endmonth < 0
+
+replace startmonth = endmonth[_n-1] if spell == 1 & startmonth < 0 & startyear > 0 & endmonth[_n-1] > 0
+
+gen valstart = (startmonth > 0 & startyear > 0)
+gen valend   = (endmonth > 0 & endyear > 0)
+
+replace startmonth = midmonth if valstart == 0 & midmonth != .
+replace startyear  = midyear  if valstart == 0 & midyear  != .
+replace endmonth   = midmonth if valend   == 0 & midmonth != .
+replace endyear    = midyear  if valend   == 0 & midyear  != .
+
+/*************************************************************************************************
+ * COMPUTE SPELL DURATION AND VALIDATION
+ *************************************************************************************************/
+
+gen valdat = (startyear > 0 & startmonth > 0 & endyear > 0 & endmonth > 0)
+gen durat  = 12 * (endyear - startyear) + (endmonth - startmonth) if valdat == 1
+
+save allspells1, replace
+
+/*************************************************************************************************
+ * FILTER AND CLEAN SPELLS
+ *************************************************************************************************/
+
+use allspells1, clear
+
+gen d2 = (valdat == 0)
+bys pidp: egen nd2 = sum(d2)
+tab nd2
+
+keep if nd2 == 0
+keep if durat >= 0
+drop if durat == .
+drop if espstat < 0
+
+save allspells1ok, replace
+
+/*************************************************************************************************
+ * END
+ *************************************************************************************************/
+di as text "------------------------------------------------------"
+di as text "All spells processed and saved as allspells1ok.dta"
+di as text "------------------------------------------------------"
+
+cap log close
diff --git a/input/InitialPopulations/compile/do_emphist/07_Empcal1a.do b/input/InitialPopulations/compile/do_emphist/07_Empcal1a.do
new file mode 100644
index 000000000..8dc71ee78
--- /dev/null
+++ b/input/InitialPopulations/compile/do_emphist/07_Empcal1a.do
@@ -0,0 +1,324 @@
+/*************************************************************************************************
+* PROJECT:   UKMOD update – create employment calendar and per-wave employment history
+* FILE:      07_Empcal1a.do
+*
+* PURPOSE:
+*   - Build a monthly employment calendar (2007 onward) from all employment spells.
+*   - Derive per-wave employment history variables needed for UKMOD.
+*   - Output one per-wave file (b_emphist, ..., n_emphist) with summary measures.
+*
+* INPUTS:
+*   allspells1ok.dta - individual-level employment spells constructed in 06_Aspells1.do
+*   ${original_data}\<wave>_indresp.dta   - wave-specific interview response data
+*
+* OUTPUTS:
+*   ${data}\<wave>_emphist.dta  - per-wave employment history summary files
+*   ${data}\temp_liwwh.dta - long file with all waves appended 
+*************************************************************************************************/
+local baseyr 2007 //==> All subsequent month indexing is relative to January 2007.
+
+use allspells1ok, clear   // Load the prepared spell data
+
+*-------------------------------------------------------------*
+* Convert start and end dates into months since base year (Jan 2007)
+*-------------------------------------------------------------*
+
+gen stmy07 = 12 * (startyear - `baseyr') + startmonth
+gen enmy07 = 12 * (endyear  - `baseyr') + endmonth
+
+*-------------------------------------------------------------*
+* Simplified employment status: 2 = employed, 1 = not employed
+*-------------------------------------------------------------*
+fre espstat
+gen emp=1 if espstat > 2
+replace emp=2 if espstat < 3
+tab espstat emp
+ 
+*-------------------------------------------------------------*
+* Determine full observed month range
+*-------------------------------------------------------------*
+summ enmy07, meanonly
+local maxm = r(max)
+local minm = 1   // start from month 1 to avoid negatives
+
+di as txt "Detected month range: " as res "`minm'–`maxm' (" as res `=`maxm'-`minm'+1' " months total)"
+
+*-------------------------------------------------------------*
+* Generate monthly employment indicators (esp# = status each month)
+*-------------------------------------------------------------*
+forvalues i = `minm'/`maxm' {
+    gen esp`i' = 0
+    replace esp`i' = emp if `i' >= stmy07 & `i' <= enmy07
+}
+
+*-------------------------------------------------------------*
+* Collapse multiple spells per person (2 overrides 1)
+*-------------------------------------------------------------*
+forvalues i = `minm'/`maxm' {
+    bys pidp: egen memp`i' = max(esp`i')
+}
+
+*-------------------------------------------------------------*
+* Keep one row per person and retain key variables
+*-------------------------------------------------------------*
+bys pidp: gen seq = _n
+keep if seq == 1
+keep pidp memorig memp`minm'-memp`maxm'
+
+/*-------------------------------------------------------------*
+* Count employed months per financial year (April–March) ==> not sure is this is needed so coded out for now
+*-------------------------------------------------------------*
+summ memp*, meanonly
+
+local fy_start = `baseyr'
+local fy_end   = floor(`baseyr' + (`maxm' + 8) / 12)   // +8 ensures FY covers Apr–Mar
+
+forvalues y = `fy_start'/`fy_end' {
+    local fy = substr("`y'",3,2)                 // e.g. 2007 → "07"
+    local start = (12 * (`y' - `baseyr')) + 4    // April of FY
+    local end   = (12 * (`y' - `baseyr' + 1)) + 3 // March next year
+
+    * Clip to observed range
+    if `start' < `minm' local start = `minm'
+    if `end' > `maxm' local end = `maxm'
+
+    * Count months employed (status = 2)
+    gen efy`fy' = 0
+    forvalues i = `start'/`end' {
+        replace efy`fy' = efy`fy' + 1 if memp`i' == 2
+    }
+
+    di as txt "FY" `y' "/" `= `y'+1' " → months " as res "`start'–`end'"
+}
+*/
+
+save empcal1a, replace
+/*we end up with a monthy calendar of activity (i.e. employed or not) for each individual from Jan 2007*/
+
+/*************************************************************************************************
+  Derive wave-specific employment history summaries
+  --------------------------------------------------------------------
+  For each wave, merge with interview date, calculate employment duration
+  up to that interview month (liwwh), and short-term employment indicators:
+     empmonth   - months employed in 6 months before interview
+     mismonth   - months missing in last 6 months
+     empmonth12 - months employed in 12 months before interview
+*************************************************************************************************/
+
+local waves $UKHLS_panel_waves
+//local waves b
+
+foreach w of local waves {
+
+    di "---------------------------------------------------------"
+    di "Processing WAVE `w' ..."
+    di "---------------------------------------------------------"
+
+    use empcal1a, clear
+
+    merge 1:1 pidp using ${dir_ukhls_data}/`w'_indresp, ///
+        keepusing(`w'_intdatm_dv `w'_intdaty_dv `w'_ivfio)
+    keep if _merge == 3
+    drop _merge
+    drop if `w'_ivfio == 2     // exclude proxy interviews
+
+    * Interview month index relative to base year (Jan 2007)
+    gen inmy07 = 12*(`w'_intdaty_dv - `baseyr') + `w'_intdatm_dv
+
+    
+    *------------------------------------------
+    * Total months employed up to interview
+    *------------------------------------------
+    gen liwwh = 0
+    summarize inmy07, meanonly
+    local maxm = r(max)
+    forvalues i = 1/`maxm' {
+        replace liwwh = liwwh + 1 if memp`i' == 2 & `i' <= inmy07
+    }
+
+	*------------------------------------------
+    * Short-term employment summaries
+    *------------------------------------------
+    summarize inmy07, meanonly
+    local maxm = r(max)
+    local start6m = `maxm' - 6
+    local start12m = `maxm' - 12
+
+    gen empmonth = 0
+    gen mismonth = 0
+    gen empmonth12 = 0
+
+    forvalues i = `start6m'/`maxm' {
+        replace empmonth = empmonth + 1 if memp`i' == 2 & inmy07 == `maxm'
+        replace mismonth = mismonth + 1 if memp`i' == 0 & inmy07 == `maxm'
+    }
+    forvalues i = `start12m'/`maxm' {
+        replace empmonth12 = empmonth12 + 1 if memp`i' == 2 & inmy07 == `maxm'
+    }
+    
+	*------------------------------------------
+    * Keep and label key variables
+    *------------------------------------------
+    keep pidp `w'_intdatm_dv `w'_intdaty_dv liwwh empmonth mismonth empmonth12 //efy1 efy2
+    label var liwwh      "Total months in employment up to current interview"
+    label var empmonth   "Months employed in last 6 months before interview"
+    label var mismonth   "Months missing in last 6 months before interview"
+    label var empmonth12 "Months employed in last 12 months before interview"
+
+    save `w'_emphist, replace
+}
+
+di as txt "All waves (B–N) processed successfully."
+
+*------------------------------------------ 
+* Combine per-wave employment history files
+*------------------------------------------
+
+* Convert global into a local list and remove first letter (because we start from wave c)
+local waves $UKHLS_panel_waves
+local first : word 1 of `waves'
+local waves : list waves - first
+
+display "Waves to append: `waves'"
+
+use b_emphist, clear 
+gen wave = "b"
+
+foreach w of local waves {
+    display "Appending wave `w'..."
+    append using `w'_emphist, generate(flag_`w')
+    replace wave = "`w'" if flag_`w' == 1
+    drop flag_`w'
+}
+
+* generate wave identifier 
+gen swv = .
+
+local letters $UKHLS_panel_waves
+local numbers $UKHLS_panel_waves_numbers
+
+local n : word count `letters'
+forval i = 1/`n' {
+    local wv : word `i' of `letters'
+    local num : word `i' of `numbers'
+    replace swv = `num' if wave == "`wv'"
+}
+
+gen idperson=pidp 
+
+save temp_liwwh.dta, replace
+
+duplicates report swv idperson 
+bys swv: sum liwwh
+
+cap log close 
+
+/**************************************************************************************
+* clean-up and exit
+*************************************************************************************/
+
+#delimit ;
+local files_to_drop 
+allspells1.dta    
+a_sp0_ne.dta      
+bhps_lint.dta     
+b_emphist.dta     
+b_lint.dta        
+b_sp0.dta   
+b_sp0_ne.dta
+b_sp1.dta
+b_sp1a.dta
+b_sp2.dta
+c_emphist.dta
+c_lint.dta
+c_sp0.dta
+c_sp0_ne.dta
+c_sp1.dta
+c_sp1a.dta
+c_sp2.dta
+d_emphist.dta
+d_lint.dta
+d_sp0.dta
+d_sp0_ne.dta
+d_sp1.dta
+d_sp1a.dta
+d_sp2.dta
+e_emphist.dta
+e_lint.dta
+e_sp0.dta
+e_sp0_ne.dta
+e_sp1.dta
+e_sp1a.dta
+e_sp2.dta
+f_emphist.dta
+f_lint.dta
+f_sp0.dta
+f_sp0_ne.dta
+f_sp1.dta
+f_sp1a.dta
+f_sp2.dta
+g_emphist.dta
+g_lint.dta
+g_sp0.dta
+g_sp0_ne.dta
+g_sp1.dta
+g_sp1a.dta
+g_sp2.dta
+h_emphist.dta
+h_lint.dta
+h_sp0.dta
+h_sp0_ne.dta
+h_sp1.dta
+h_sp1a.dta
+h_sp2.dta
+intdate.dta
+intdate1.dta
+i_emphist.dta
+i_lint.dta
+i_sp0.dta
+i_sp0_ne.dta
+i_sp1.dta
+i_sp1a.dta
+i_sp2.dta
+j_emphist.dta
+j_lint.dta
+j_sp0.dta
+j_sp0_ne.dta
+j_sp1.dta
+j_sp1a.dta
+j_sp2.dta
+k_emphist.dta
+k_lint.dta
+k_sp0.dta
+k_sp0_ne.dta
+k_sp1.dta
+k_sp1a.dta
+k_sp2.dta
+l_emphist.dta
+l_lint.dta
+l_sp0.dta
+l_sp0_ne.dta
+l_sp1.dta
+l_sp1a.dta
+l_sp2.dta
+m_emphist.dta
+m_lint.dta
+m_sp0.dta
+m_sp0_ne.dta
+m_sp1.dta
+m_sp1a.dta
+m_sp2.dta
+n_emphist.dta
+n_lint.dta
+n_sp0.dta
+n_sp0_ne.dta
+n_sp1.dta
+n_sp1a.dta
+n_sp2.dta
+	;
+#delimit cr // cr stands for carriage return
+
+foreach file of local files_to_drop { 
+	erase "$dir_data_emphist/`file'"
+}
+
diff --git a/input/reg_fertility.xlsx b/input/reg_fertility.xlsx
index 27644860a..d36df9aa3 100644
Binary files a/input/reg_fertility.xlsx and b/input/reg_fertility.xlsx differ
diff --git a/input/reg_home_ownership.xlsx b/input/reg_home_ownership.xlsx
index b272ac25a..25b15ec1b 100644
Binary files a/input/reg_home_ownership.xlsx and b/input/reg_home_ownership.xlsx differ
diff --git a/input/reg_labourSupplyUtility.xlsx b/input/reg_labourSupplyUtility.xlsx
index 8d8403ce9..b9ccb4585 100644
Binary files a/input/reg_labourSupplyUtility.xlsx and b/input/reg_labourSupplyUtility.xlsx differ
diff --git a/input/reg_leaveParentalHome.xlsx b/input/reg_leaveParentalHome.xlsx
index b41d9e2e9..39723eb58 100644
Binary files a/input/reg_leaveParentalHome.xlsx and b/input/reg_leaveParentalHome.xlsx differ