capture log close _all clear * Define some variables; local today=subinstr("`c(current_date)'"," ","",.) local time=subinstr("`c(current_time)'",":","",.) * stata version compatibility version 16.0 * log file number local vnum="1_13" * Syntax name local syntax "Prepare01" * Projectname local project "C19a" * Open log file; log using "$logfilepath/StataLog_AW_`project'_`syntax'_`vnum'_`today'_`time'", replace text name(Merge`syntax') local logname=r(name) timer clear 1 * Change Log ******************************** * 1.01: Added lan dummy for Stockholm region * 1.03: Added egen groupSexDispink5=group(kon dispink2018_5), label * 1.04: Added drop if tpop_oldnew==1 * 1.05: Corrected sun3 * 1.06: Adapted age split to ages after 98 (now to max 110) * 1.07: Added more groups to combinations (Sun3lmic dispink3lmic) * 1.08-1.09: Added comments for public extraction * 1.10: added drop for missing lmic and dispink * 1.11: Added mena * 1.11: Added episode identifier to allow for logistic regression in sensitivity analyses on same data * 1.12: Added categorical variable for age to allow for more descriptives by age * 1.13: Code improvements to increase reliability clear use "$datafilepath/AW_`project'_Stset" local lar=20 * Count those who die on first day of observation period count if ddatecod==mdy(3,12,2020) * Drop those who are not in survival analyses (should be the same as those who died on first day of observation period) drop if _st==0 * Base pop count * Drop those who have are in totalpopulation2020 but not in the original totalpopulation of the data * Covid deaths are based on the original totalpopulation drop if tpop_oldnew==1 count * Generate dummy for covid deaths to distinguish models between all-cause and covid-only models generate byte cod_cov=0 replace cod_cov=1 if covid==1 * Dummy for Stockholm region based on existing lan variable end of 2019 gen stolan=0 replace stolan=1 if lan2019==1 label define stolan 0 "Non Stockholm" 1 "Stockholms Lan", replace lab val stolan stolan * Marital statusbased on existing variable end of 2019 gen marital_status=. replace marital_status=0 if civil2019=="OG" replace marital_status=1 if civil2019=="G" | civil2019=="RP" replace marital_status=2 if civil2019=="S" | civil2019=="SP" replace marital_status=3 if civil2019=="Ä" | civil2019=="EP" label define marital_status 0 "Never Married" 1 "Married" 2 "Sep/Div" 3 "Widowed", replace lab val marital_status marital_status * Country of birth grouping * High, low-middle income countries, World Bank definition /*Generate high vs low, middle income countries*/ gen lmic = . replace lmic = 0 if birthcountry==1 replace lmic = 1 if birthcountry>=2 & birthcountry<=22 replace lmic = 1 if inlist(birthcountry, 24, 42) replace lmic = 2 if lmic==. & birthcountry!=99 & birthcountry!=1 /*Pick out early countries from the first if statement included in HIC*/ replace lmic = 2 if inlist(birthcountry, 6, 7, 16, 17, 18) label define lmic 0 "Sweden" 1 "HIC" 2 "LMIC", modify label variable lmic "HIC vs LMIC, World Bank definitions" label values lmic lmic tab birthcountry lmic, m * Drop those with missing country of birth drop if lmic==. count /* Notes - because of the pre-existing country groupings, there are some small ambiguities e.g. Croatia is HIC, but part of Former Yugoslavia, most of which is LMIC e.g. Uruguary is HIC, but part of South America, most which is LMIC */ /*Generate extended definition to differentiate MENA*/ /*World Bank definition*/ gen mena = . replace mena = 0 if birthcountry==1 replace mena = 1 if birthcountry>=2 & birthcountry<=22 replace mena = 1 if inlist(birthcountry, 24, 42) replace mena = 2 if mena==. & birthcountry!=99 & birthcountry!=1 /*Pick out early countries from the first if statement included in HIC*/ replace mena = 2 if inlist(birthcountry, 6, 7, 16, 17, 18) replace mena = 3 if inlist(birthcountry, 30, 31, 32, 33, 34, 35) label define mena 0 "Sweden" 1 "HIC" 2 "LMIC other" 3 "LMIC MENA", modify label variable mena "HIC vs LMIC w/MENA World Bank definitions" label values mena mena tab birthcountry mena, m * Drop those with missing country of birth (should affect 0 individuals as done in lmic) drop if mena==. count * Generate sex variable from existing variable from birth register encode kon, gen(sex) label define sex 1 "Men" 2 "Women", replace lab val sex sex * Generate education variables based on last existing known education (end of 2018) destring sun2018, replace gen byte sun3=9 replace sun3=9 if sun2018==. replace sun3=1 if sun2018==1 replace sun3=1 if sun2018==2 replace sun3=2 if sun2018==3 replace sun3=3 if sun2018==4 replace sun3=3 if sun2018==5 replace sun3=3 if sun2018==6 label define sun3 1 "primary" 2 "Secondary" 3 "Post-Second" 9 "Missing", replace lab val sun3 sun3 * Generate income variables based on last existing known tercile of income (end of 2018) xtile dispink2018_3=dispink2018, nq(3) * Drop those with missing income drop if dispink2018_3==. * Age split for every age after age lar gen age=. gen dateage`lar'=mdy(month(bdate), day(bdate), year(bdate)+`lar') stsplit splitage, after at(0(365)38325) (time=dateage`lar') replace age=0 if splitage==-1 replace age=(splitage/365)+`lar' if splitage>=0 gen agecat=. replace agecat=1 if age >=`lar' & age<=49 replace agecat=2 if age >=50 & age<=69 replace agecat=3 if age >=70 & age<=79 replace agecat=4 if age >=80 & age<=89 replace agecat=5 if age >=90 & age!=. label define agecat 1 "20-49" 2 "50-69" 3 "70-79" 4 "80-89" 5 "90+", replace lab val agecat agecat * Identify first episode for later deletion to allow logistic regression on same data sort lopnr _t0 by lopnr: gen episodeX=_n by lopnr: gen episodeZ=_N gen logistReg=1 replace logistReg=0 if episodeX!=episodeZ gen logistage=age replace logistage=age-1 if episodeX==episodeZ & episodeX!=1 drop episodeX episodeZ gen agecatlogist=. replace agecatlogist=1 if logistage>=`lar' & logistage<=29 replace agecatlogist=2 if logistage>=30 & logistage<=34 replace agecatlogist=3 if logistage>=35 & logistage<=39 replace agecatlogist=4 if logistage>=40 & logistage<=44 replace agecatlogist=5 if logistage>=45 & logistage<=49 replace agecatlogist=6 if logistage>=50 & logistage<=54 replace agecatlogist=7 if logistage>=55 & logistage<=59 replace agecatlogist=8 if logistage>=60 & logistage<=64 replace agecatlogist=9 if logistage>=65 & logistage<=69 replace agecatlogist=10 if logistage>=70 & logistage<=74 replace agecatlogist=11 if logistage>=75 & logistage<=79 replace agecatlogist=12 if logistage>=80 & logistage<=84 replace agecatlogist=13 if logistage>=85 & logistage<=89 replace agecatlogist=14 if logistage>=90 & logistage<=94 replace agecatlogist=15 if logistage>=95 & logistage<=99 replace agecatlogist=16 if logistage>=100 & logistage!=. replace agecatlogist=. if logistReg==0 label define agecatlogist 1 "20-29" 2 "30-34" 3 "35-39" 4 "40-44" 5 "45-49" 6 "50-54" 7 "55-59" 8 "60-64" 9 "65-69" 10 "70-74" 11 "75-79" 12 "80-84" 13 "85-89" 14 "90-94" 15 "95-99" 16 "100+", replace lab val agecatlogist agecatlogist * Create the combination of some variables for descriptive statistics egen groupSexSun3=group(kon sun3), label egen groupSexDispink=group(kon dispink2018_3), label egen groupSexMaritalStatus=group(kon marital_status), label egen groupSexlmic=group(kon lmic), label egen groupSexmena=group(kon mena), label egen groupSexStolan=group(kon stolan), label egen groupDispink3lmic=group(dispink2018_3 lmic), label egen groupSun3lmic=group(sun3 lmic), label * Create by agecat sex and variables for even more descriptive statistics egen groupSexagecatSun3=group(kon agecat sun3), label egen groupSexagecatDispink=group(kon agecat dispink2018_3), label egen groupSexagecatMaritalStatus=group(kon agecat marital_status), label egen groupSexagecatlmic=group(kon agecat lmic), label egen groupSexagecatmena=group(kon agecat mena), label egen groupSexagecatStolan=group(kon agecat stolan), label cd "$outputpath" * Save file save "$datafilepath/AW_`project'_`syntax'", replace timer list 1 log close `logname'