***************************************************************** Program: UK1983Code.sas Programmer: Aaron Lai and Oiching Yeung Client: CTUS, University of Oxford Project: UK 1983 conversion to MTUS 5.53, 5.8 and 6.0 Purpose: Process the UK 1983 data Remark: *********************************************************************; libname uksas 'E:\oxford\MTUS\UK\sas\newteam'; options nofmterr ; *** print out the contents ***; proc contents data=uksas.bigdat; run; data qnn (index=(hh_id)); set uksas.qnnarie2 (keep= dem1 dem2 dem176 hh:) ; hh_id = compress(put(dem1,z4.0))||'_'||compress(put(dem2,z4.0)); drop dem1 dem2 ; run ; data demography (keep = hh_id diary_start_dt dem1-dem204 hh:) ; set uksas.bigdat ; hh_id = compress(put(dem1,z4.0))||'_'||compress(put(dem2,z4.0)); set qnn key=hh_id / unique; format diary_start_dt yymmdd10.; * dem105 is the month; * dem104 is the day; if dem105 ge 9 then diary_start_dt = mdy(dem105, dem104,1983); if dem105 lt 9 then diary_start_dt = mdy(dem105, dem104,1984); run ; proc sort data=uksas.demography nodupkey ; by hh_id ; run ; *** adding additional variables from newqnnarie ; * pick up variables from new file; data newqnn; set uksas.newqnnarie; hh_id = compress(put(dem1,z4.0))||'_'||compress(put(dem2,z4.0)); keep hh_id aglftsch disab health ocombwt region urban vehicle hh1-hh22; run; proc sql; create table uksas.demography as select distinct * from demography a left join newqnn b on a.hh_id = b.hh_id order by hh_id; quit; *********************************************************** create table for relationship such as parents and spouse ID ***********************************************************; proc means data=uksas.demography(keep=dem1 dem2) n noprint ; by dem1 ; var dem2 ; output out=hh_nr (where=(_stat_='N') drop=_type_ dem2 ); run ; data hh_nr (index=(dem1)) ; set hh_nr ; rename _freq_ = hh_nr ; drop _stat_ ; run ; proc sort data=uksas.demography (keep=hh_id dem1 dem113 dem116 dem117) out=rel ; by dem1 dem113 ; run ; *** create rel to contain HH needs processing and single HH that process differently; data rel singleHH; set rel; set hh_nr key=dem1 / unique ; if hh_nr > 1 then output rel; * eliminate single member hh *; else output singleHH; * single HH; if dem113 not in (50,98,99,40,13) ; * eliminate unrelated individuals. Grandparent without children tec.*; if dem1 not in (674,1262,1341) ; * Kids only hh */ *** temp change to dem113 to correct mis-labeling ***; if dem1 in (357, 489, 655, 1151, 1412) and dem113=1 then dem113=0 ; run ; data rel0 (index=(dem1)) /* spouse 1 */ rel1 (index=(dem1)) /* spouse 0 */ ; set rel (where=(dem113 in (0,1))) ; by dem1 ; if dem113=0 then output rel0 ; else if dem113=1 then output rel1 ; run ; data rel11 (index=(dem1)) ; /* parents to householder */ set rel (where=(dem113 in (11,12))) ; by dem1 ; run ; /* * output relationship for checking; proc freq data=rel ; tables dem113 ; run; */ *** generate relationship variables *; data relationship (keep = hh_id parent1-parent2 spouse index=(hh_id)) ; set rel ; where dem113 not in (11,12) ; length parent1 parent2 spouse $9. ; parent1 = ' '; parent2 = ' '; spouse = ' '; * self; if dem113 = 0 then do ; set rel1 (keep=dem1 hh_id rename=(hh_id=spouse)) key=dem1 / unique ; if _iorc_ ne 0 then do ; _error_=0 ; spouse= ' ' ; end ; *** there is no household with 2 parents to hhdr ***; set rel11 (keep=dem1 hh_id rename=(hh_id=parent1)) key=dem1 / unique ; if _iorc_ ne 0 then do ; _error_=0 ; parent1= ' ' ; end ; end ; * spouse; if dem113 = 1 then do ; set rel0 (keep=dem1 hh_id rename=(hh_id=spouse)) key=dem1 / unique ; if _iorc_ ne 0 then do ; _error_=0 ; spouse= ' ' ; end ; *** there is no household with 2 parents to spouse ***; set rel11 (keep=dem1 hh_id rename=(hh_id=parent1)) key=dem1 / unique ; if _iorc_ ne 0 then do ; _error_=0 ; parent1= ' ' ; end ; end ; * son/daughter; if dem113 in (21, 22) then do ; set rel0 (keep=dem1 hh_id rename=(hh_id=p)) key=dem1 / unique ; if _iorc_=0 then parent1=p ; set rel1 (keep=dem1 hh_id rename=(hh_id=p)) key=dem1 / unique ; if _iorc_=0 then parent2=p; else _error_=0 ; end ; * brothers; if dem113=30 then do ; spouse = ' ' ; set rel11 (keep=dem1 hh_id rename=(hh_id=parent1)) key=dem1 / unique ; if _iorc_ ne 0 then do ; _error_=0 ; parent1= ' ' ; end ; end ; *** hhdr has child and grandchild ***; if dem113=23 then do ; spouse=' ' ; parent2=' ' ; if dem1=1598 then parent1 = '1598_0003' ; if dem1=1782 then parent1 = '1782_0003' ; end ; if parent1= ' ' and parent2=' ' and spouse=' ' then delete ; run ; *********************************************************** create table for single parent ***********************************************************; proc sql; create table oneparent as select distinct hh_id, parent1 from relationship where parent1 ne ' ' and parent2 = ' ' ; create table spouse as select distinct hh_id from relationship where spouse ne ' ' ; create table single1parent as select distinct hh_id from uksas.demography where hh_id not in (select hh_id from spouse) and hh_id in (select parent1 from oneparent) ; quit; data single1parent (index=(hh_id)); set single1parent; run; *********************************************************** create table for HHtype ***********************************************************; data coupleonly (index=(hh_id)) ; set relationship (where = (parent1 = ' ' and parent2 = ' ' and spouse ne ' ')); keep hh_id ; run; data coupleplus (index=(hh_id)) ; set relationship (where=(spouse ne ' ')); keep hh_id ; run; proc sql; create table coupleonly as select distinct hh_id from coupleonly order by hh_id; create table coupleplus as select distinct hh_id from coupleplus order by hh_id; quit; proc sort data = uksas.demography (keep = dem1 hh_id dem47) out=demo; by hh_id; run; data hhtype1; merge demo (in=a) coupleonly (in=b) coupleplus (in=c); by hh_id; if a; h1 = 0; h2 = 0; h3 = 0; h4 = 0; if dem47 = 1 then h1 = 1; * check for couple only; if b and dem47 = 2 then h2 = 1; * check for couple plus; if c and dem47 > 2 then h3 = 1; if h1 + h2 + h3 = 0 then h4 = 1; run; * find out the HH level hhtype; proc sql; create table maxtype as select dem1, count(*) as n, sum(h1) as h1, sum(h2) as h2 , sum(h3) as h3, sum(h4) as h4 from hhtype1 group by dem1 order by dem1; quit; data hhtype (index=(dem1)) ; set maxtype; hhtype = 4; if h1 > 0 and sum(of h2-h4) = 0 then hhtype = 1; if h2 > 0 and sum(of h3-h4) = 0 then hhtype = 2; if h3 > 0 then hhtype = 3; run; *********************************************************** create table for unmarried child ***********************************************************; data cphome (index=(hh_id)); set relationship (where=((parent1 ne ' ' or parent2 ne ' ') and spouse = ' ')); run; *********************************************************** create table for youngest diarist ***********************************************************; proc sql; create table youngest as select dem1, min(dem111) as minage from uksas.demography group by dem1 order by dem1 ; quit; data youngest (index=(dem1)); set youngest; run; *********************************************************** create table for demographic variables ***********************************************************; data uksas.family_stru_out; set uksas.demography; countreg = 37; survey = 1983; swave = 0; msamp = 7; * HH ID is a derived field based on a combination of dem1 (HH ID) and dem2 (person ID); hldid = dem1; persid = dem2; hhldsize = dem47; * bring in parent ID and spouse ID; length parent1 parent2 spouse $ 9; parent1 = ''; parent2 = ''; spouse = ''; set relationship key=hh_id / unique ; if _iorc_ = 0 then do; parntid1 = substr(parent1,9,1)*1; parntid2 = substr(parent2,9,1)*1; partID = substr(spouse,9,1)*1; if parntid1 = . and parntid2 ne . then do; parntid1 = parntid2; parntid2 = -8; end; if parntid1 = . then parntid1 = -7; if parntid2 = . then parntid2 = -7; if partid = . and dem116 in (1,2) then partid = -8; if partid = . then partid = -7; end; if _iorc_ ne 0 then do; parntid1 = -7; parntid2 = -7; if dem116 in (1,2) then partid = -8; else partid = -7; end; * income translation; * HH income are coded in non-order set and we need to rearrange it in order to find out the grouped income; * the decoding table is on p36 of 2170_mrdoc_pdf_a2170uab.pdf; * dem52: 97-refuse, 98-cannot estimate, 99-N/A; hhincome = dem52; if dem52 eq 5 then hhincome = 1; if dem52 eq 2 then hhincome = 2; if dem52 eq 11 then hhincome = 3; if dem52 eq 6 then hhincome = 4; if dem52 eq 10 then hhincome = 5; if dem52 eq 8 then hhincome = 6; if dem52 eq 1 then hhincome = 7; if dem52 eq 9 then hhincome = 8; if dem52 eq 4 then hhincome = 9; if dem52 eq 7 then hhincome = 10; if dem52 eq 12 then hhincome = 11; if dem52 eq 3 then hhincome = 12; * HH type; * we have only coded those cases with almost certainty for hh type 1-3; set hhtype (keep=dem1 hhtype) key=dem1 / unique ; if _iorc_ ne 0 then hhtype = 4 ; * number of children and youngest person; set youngest key=dem1 / unique ; if _iorc_ ne 0 then young = 0 ; else young = minage; nchild = 0; nchild = sum(of hh2-hh6); if young < 18 and nchild eq 0 then nchild = 1; if hh2 > 0 then young = 1; if hh2 = 0 and hh3 > 0 then young = 3; if sum(of hh2-hh3) = 0 and hh4 > 0 then young = 8; if sum(of hh2-hh4) = 0 and hh5 > 0 then young = 12; if sum(of hh2-hh5) = 0 and hh6 > 0 then young = 16; agekidx = -7; if hh2 > 0 or hh3 > 0 then agekidx = 1; if hh2 eq 0 and hh3 eq 0 and hh4 > 0 then agekidx = 2; if sum(of hh2-hh4) eq 0 and (hh5 > 0 or hh6 > 0) then agekidx = 3; if young ge 18 then agekidx = 4; * manually fix an mis-assign; if hldid = 1407 then agekidx = 3; agekid2 = young; * HH income is a coded field with valid values from 1-12, 99 is missing as stated in doc; * here we assume 97 and 98 are also considered as missing; * see the earlier part for income ordering; incorig = -8; if dem52 lt 97 then incorig = dem52; income = -8; if hhincome le 3 then income = 1; if hhincome gt 3 and hhincome le 8 then income = 2; if hhincome gt 8 then income = 3; ownhome = 3; if dem17 eq 1 or dem17 eq 2 then ownhome = 1; if dem17 eq 3 or dem17 eq 4 then ownhome = 2; computer = 0; if dem63 eq 2 then computer = 1; sex = -8; if dem112 eq 1 then sex = 1; if dem112 eq 2 then sex = 2; * manually fix some missing sex; if hh_id = '1227_0003' then sex = 1 ; if hh_id = '1407_0003' then sex = 2 ; if hh_id = '1744_0001' then sex = 1 ; if hh_id = '2310_0003' then sex = 1 ; age = dem111; if dem111 gt 80 then age = 80; *** family status and related information; famstat = -8; * adjustment with number of child info; if age ge 18 and age le 39 and nchild eq 0 then famstat = 0; if age ge 18 and (hh2 > 0 or hh3 > 0) then famstat = 1; if age ge 18 and hh2 eq 0 and hh3 eq 0 and nchild > 0 then famstat = 2; if age ge 40 and nchild eq 0 then famstat = 3; if age < 18 and dem113 in (21,22) then famstat = 4; if age < 18 and dem113 not in (21,22) then famstat = 5; * unmarried child living with parents; cphome = 0; * check if the diarist has parents and no spouse (and not married/cohab); set cphome (keep=hh_id) key=hh_id / unique ; if _iorc_=0 and dem116 not in (1,2) then cphome = 1 ; * sole parent; singpar = 0; * check if the diarist has parents and no spouse (and not married/cohab); set single1parent (keep=hh_id) key=hh_id / unique ; if _iorc_=0 then singpar = 1 ; relrefp = 1; if dem113 eq 0 then relrefp = 1; if dem113 eq 1 then relrefp = 2; if dem113 eq 21 then relrefp = 3; if dem113 eq 11 then relrefp = 4; if dem113 eq 30 then relrefp = 5; * this include in-law, ie relrefp = 8; if dem113 eq 22 then relrefp = 6; if dem113 eq 12 then relrefp = 7; if dem113 eq 40 then relrefp = 9; if dem113 eq 13 then relrefp = 9; if dem113 eq 23 then relrefp = 9; if dem113 eq 50 then relrefp = 10; * if someone put down non-relative but live alone, then s/he should be the person 1; if hhtype = 1 and dem113 = 50 then relrefp = 1; civstat = 2; if dem116 in (1, 2) then civstat = 1; if partid = -7 and civstat = 1 then partid = -8; * adjusting hhtype; if hhtype = 4 and civstat = 1 then do; if hhldsize = 2 then hhtype = 2; else hhtype = 3; end; cohab = 0; if dem116 eq 2 and hhldsize ne 1 and age ge 18 then cohab = 1; * citizenship information is not available; citizen = -9; * employment status; * dem137 is hours worked: =98 if declined and =99 if n/a (p15 of 2170_mrdoc_pdf_a2170uab.pdf); * if the hours worked >= 40 is considered as full-time; empstat = 4; if dem137 ge 40 and dem137 lt 98 and dem136 eq 1 then empstat = 1; if dem137 ne 0 and dem137 lt 40 and dem136 eq 1 then empstat = 2; if empstat ne 1 and empstat ne 2 and dem136 eq 1 then empstat = 3; emp = 0; if empstat in (1,2,3) then emp = 1; unemp = 0; if dem136 eq 4 then unemp = 1; student = 0; if dem136 eq 6 then student = 1; if age lt 16 then student = 1; retired = 0; if dem136 eq 8 then retired = 1; * if age > 65 and 0 < workhrs < 30; if age gt 65 and dem137 gt 0 and dem137 lt 30 then retired = 1; workhrs = dem137; if dem137 eq 98 then workhrs = -8; if dem137 eq 99 then workhrs = -8; * here we use the mid-point of annual salary (in GBP) / 12; * for the > 20000, we assumed 30000/12; empinclm = -8; if dem52 eq 5 then empinclm = 1000/12; if dem52 eq 2 then empinclm = 2500/12; if dem52 eq 11 then empinclm = 3500/12; if dem52 eq 6 then empinclm = 4500/12; if dem52 eq 10 then empinclm = 5500/12; if dem52 eq 8 then empinclm = 6500/12; if dem52 eq 1 then empinclm = 7500/12; if dem52 eq 9 then empinclm = 8500/12; if dem52 eq 4 then empinclm = 9500/12; if dem52 eq 7 then empinclm = 10500/12; if dem52 eq 12 then empinclm = 11500/12; if dem52 eq 3 then empinclm = 30000/12; * occupation code, dem145 The data is downloaded from this site http://www.camsis.stir.ac.uk/Data/Britain91.html#Occupational_Classification Here below is the description from the site. Occupational Classification The British 1991 CAMSIS version was constructed using the Standard Occupational Classification in use for the 1991 Census (SOC90). The data source, however, was the 1991 census Sample of Anonymised Respondents 1% Household (SARS) and it should be noted that the occupational schema used for the SARS differs slightly from the 1990 SOC as a result of the aggregation of a few sparse occupations. The table below shows the link between units of the SOC and SAR schema. This table also shows the link between SOC90 and occupational categories used in the 1981 Census. Although there is not a one-to-one correspondence between the occupation unit groups (OUGs) of the SOC and the 1981 Occupational Classification, the match is quite close (despite the difference in the way that the categories are ordered). The table below, ordered by 1980 Operational Code, shows which 1980 group has been matched to an SOC unit group. The 1980 classification is unusual in distinguishing numerous separate OUGs for supervisors. Where several SOC groups make up a 1980 OUG, the score for the latter is based on a weighted combination. CAMSIS scores for ISCO-88 occupational categories were derived from SOC90 by translation. Details of the linkage file and other files relevant to occupational data can be found on the CAMSIS page dealing with occupational classifications. The column 1980 Oper. Code should be applied to all SOC under the same SOC, SAR, and OUG, and not just the SOC, SAR, and OUG pairs. ; occup = -8; if dem145 eq 0 then occup = -8 ; if dem145 eq 10 then occup = -8 ; if dem145 eq 21 then occup = -8 ; if dem145 eq 22 then occup = -8 ; if dem145 eq 23 then occup = -8 ; if dem145 eq 26 then occup = -8 ; if dem145 eq 32 then occup = -8 ; if dem145 eq 51 then occup = -8 ; if dem145 eq 54 then occup = -8 ; if dem145 eq 72 then occup = -8 ; if dem145 eq 93 then occup = -8 ; if dem145 eq 94 then occup = -8 ; if dem145 eq 96 then occup = -8 ; if dem145 eq 441 then occup = -8 ; if dem145 eq 935 then occup = -8 ; if dem145 eq 1030 then occup = -8 ; if dem145 eq 1144 then occup = -8 ; if dem145 eq 9991 then occup = -8 ; if dem145 eq 9992 then occup = -8 ; if dem145 eq 10712 then occup = -8 ; if dem145 eq 13811 then occup = -8 ; if dem145 eq 13912 then occup = -8 ; if dem145 eq 211 then occup = 11 ; if dem145 eq 830 then occup = 12 ; if dem145 eq 1612 then occup = 14 ; if dem145 eq 676 then occup = 8 ; if dem145 eq 701 then occup = 8 ; if dem145 eq 311 then occup = 3 ; if dem145 eq 331 then occup = 9 ; if dem145 eq 201 then occup = 11 ; if dem145 eq 1317 then occup = 13 ; if dem145 eq 1341 then occup = 13 ; if dem145 eq 1351 then occup = 13 ; if dem145 eq 1353 then occup = 13 ; if dem145 eq 190 then occup = 11 ; if dem145 eq 901 then occup = 14 ; if dem145 eq 985 then occup = 14 ; if dem145 eq 633 then occup = 14 ; if dem145 eq 652 then occup = 14 ; if dem145 eq 241 then occup = 3 ; if dem145 eq 932 then occup = 14 ; if dem145 eq 755 then occup = 9 ; if dem145 eq 1114 then occup = 13 ; if dem145 eq 1124 then occup = 13 ; if dem145 eq 1391 then occup = 13 ; if dem145 eq 1392 then occup = 13 ; if dem145 eq 1401 then occup = 13 ; if dem145 eq 1402 then occup = 13 ; if dem145 eq 1406 then occup = 13 ; if dem145 eq 313 then occup = 3 ; if dem145 eq 1521 then occup = 13 ; if dem145 eq 902 then occup = 13 ; if dem145 eq 921 then occup = 13 ; if dem145 eq 711 then occup = 13 ; if dem145 eq 721 then occup = 13 ; if dem145 eq 1041 then occup = 13 ; if dem145 eq 1051 then occup = 13 ; if dem145 eq 1122 then occup = 13 ; if dem145 eq 631 then occup = 13 ; if dem145 eq 640 then occup = 13 ; if dem145 eq 890 then occup = 13 ; if dem145 eq 175 then occup = 13 ; if dem145 eq 454 then occup = 4 ; if dem145 eq 463 then occup = 4 ; if dem145 eq 451 then occup = 4 ; if dem145 eq 25 then occup = 3 ; if dem145 eq 250 then occup = 3 ; if dem145 eq 140 then occup = 7 ; if dem145 eq 1597 then occup = 13 ; if dem145 eq 1207 then occup = 3 ; if dem145 eq 482 then occup = 9 ; if dem145 eq 500 then occup = 9 ; if dem145 eq 372 then occup = 9 ; if dem145 eq 1462 then occup = 9 ; if dem145 eq 634 then occup = 9 ; if dem145 eq 661 then occup = 9 ; if dem145 eq 1552 then occup = 13 ; if dem145 eq 1079 then occup = 13 ; if dem145 eq 152 then occup = 9 ; if dem145 eq 262 then occup = 3 ; if dem145 eq 681 then occup = 14 ; if dem145 eq 290 then occup = 14 ; if dem145 eq 1522 then occup = 14 ; if dem145 eq 683 then occup = 8 ; if dem145 eq 1202 then occup = 13 ; if dem145 eq 1212 then occup = 13 ; if dem145 eq 272 then occup = 3 ; if dem145 eq 302 then occup = 3 ; if dem145 eq 395 then occup = 1 ; if dem145 eq 1440 then occup = 13 ; if dem145 eq 1450 then occup = 13 ; if dem145 eq 400 then occup = 12 ; if dem145 eq 761 then occup = 12 ; if dem145 eq 770 then occup = 12 ; if dem145 eq 80 then occup = 12 ; if dem145 eq 800 then occup = 12 ; if dem145 eq 1543 then occup = 13 ; if dem145 eq 1553 then occup = 13 ; if dem145 eq 763 then occup = 12 ; if dem145 eq 782 then occup = 12 ; if dem145 eq 443 then occup = 14 ; if dem145 eq 740 then occup = 14 ; if dem145 eq 102 then occup = 8 ; if dem145 eq 702 then occup = 8 ; if dem145 eq 1361 then occup = 3 ; if dem145 eq 1371 then occup = 3 ; if dem145 eq 1382 then occup = 3 ; if dem145 eq 662 then occup = 14 ; if dem145 eq 301 then occup = 9 ; if dem145 eq 1606 then occup = 13 ; if dem145 eq 753 then occup = 14 ; if dem145 eq 752 then occup = 14 ; if dem145 eq 384 then occup = 1 ; if dem145 eq 382 then occup = 12 ; if dem145 eq 361 then occup = 3 ; if dem145 eq 364 then occup = 1 ; if dem145 eq 572 then occup = 9 ; if dem145 eq 1431 then occup = 13 ; if dem145 eq 1541 then occup = 13 ; if dem145 eq 1551 then occup = 13 ; if dem145 eq 151 then occup = 6 ; if dem145 eq 172 then occup = 6 ; if dem145 eq 481 then occup = 6 ; if dem145 eq 492 then occup = 6 ; if dem145 eq 181 then occup = 6 ; if dem145 eq 232 then occup = 7 ; if dem145 eq 532 then occup = 11 ; if dem145 eq 1101 then occup = 3 ; if dem145 eq 1243 then occup = 3 ; if dem145 eq 1132 then occup = 3 ; if dem145 eq 1170 then occup = 3 ; if dem145 eq 1145 then occup = 3 ; if dem145 eq 1181 then occup = 3 ; if dem145 eq 580 then occup = 10 ; if dem145 eq 682 then occup = 6 ; if dem145 eq 160 then occup = 6 ; if dem145 eq 176 then occup = 6 ; if dem145 eq 41 then occup = 10 ; if dem145 eq 410 then occup = 10 ; if dem145 eq 42 then occup = 10 ; if dem145 eq 1314 then occup = 13 ; if dem145 eq 173 then occup = 6 ; if dem145 eq 991 then occup = 14 ; if dem145 eq 1432 then occup = 13 ; if dem145 eq 989 then occup = 13 ; if dem145 eq 672 then occup = 8 ; if dem145 eq 756 then occup = 8 ; if dem145 eq 1405 then occup = 13 ; if dem145 eq 987 then occup = 14 ; if dem145 eq 1601 then occup = 13 ; if dem145 eq 1604 then occup = 13 ; if dem145 eq 1608 then occup = 13 ; if dem145 eq 1123 then occup = 13 ; if dem145 eq 444 then occup = 12 ; if dem145 eq 1313 then occup = 13 ; if dem145 eq 576 then occup = 11 ; if dem145 eq 623 then occup = 10 ; if dem145 eq 184 then occup = 5 ; if dem145 eq 984 then occup = 13 ; if dem145 eq 1582 then occup = 13 ; if dem145 eq 1372 then occup = 14 ; if dem145 eq 1334 then occup = 14 ; if dem145 eq 941 then occup = 14 ; if dem145 eq 553 then occup = 13 ; if dem145 eq 222 then occup = 9 ; if dem145 eq 243 then occup = 3 ; if dem145 eq 174 then occup = 6 ; if dem145 eq 955 then occup = 13 ; if dem145 eq 972 then occup = 13 ; if dem145 eq 132 then occup = 11 ; if dem145 eq 1250 then occup = 3 ; if dem145 eq 611 then occup = 10 ; if dem145 eq 531 then occup = 11 ; if dem145 eq 1142 then occup = 7 ; if dem145 eq 1162 then occup = 7 ; if dem145 eq 993 then occup = 13 ; if dem145 eq 1003 then occup = 13 ; if dem145 eq 282 then occup = 3 ; if dem145 eq 1211 then occup = 13 ; if dem145 eq 286 then occup = 1 ; if dem145 eq 340 then occup = 1 ; if dem145 eq 231 then occup = 7 ; if dem145 eq 123 then occup = 6 ; if dem145 eq 312 then occup = 3 ; if dem145 eq 484 then occup = 11 ; if dem145 eq 513 then occup = 11 ; if dem145 eq 1501 then occup = 13 ; if dem145 eq 1503 then occup = 13 ; if dem145 eq 511 then occup = 11 ; if dem145 eq 491 then occup = 11 ; if dem145 eq 1574 then occup = 13 ; if dem145 eq 393 then occup = 11 ; if dem145 eq 470 then occup = 11 ; if dem145 eq 1412 then occup = 13 ; if dem145 eq 1413 then occup = 13 ; if dem145 eq 1394 then occup = 14 ; if dem145 eq 1404 then occup = 14 ; if dem145 eq 560 then occup = 11 ; if dem145 eq 1381 then occup = 13 ; if dem145 eq 541 then occup = 11 ; if dem145 eq 551 then occup = 11 ; if dem145 eq 1272 then occup = 13 ; if dem145 eq 994 then occup = 14 ; if dem145 eq 1004 then occup = 14 ; if dem145 eq 110 then occup = 5 ; if dem145 eq 604 then occup = 10 ; if dem145 eq 621 then occup = 10 ; if dem145 eq 878 then occup = 13 ; if dem145 eq 1013 then occup = 13 ; if dem145 eq 1023 then occup = 13 ; if dem145 eq 1261 then occup = 13 ; if dem145 eq 552 then occup = 11 ; if dem145 eq 323 then occup = 10 ; if dem145 eq 853 then occup = 14 ; if dem145 eq 133 then occup = 4 ; if dem145 eq 862 then occup = 13 ; if dem145 eq 872 then occup = 13 ; if dem145 eq 1244 then occup = 13 ; if dem145 eq 1271 then occup = 13 ; if dem145 eq 1572 then occup = 13 ; if dem145 eq 1561 then occup = 13 ; if dem145 eq 1571 then occup = 13 ; if dem145 eq 363 then occup = 1 ; if dem145 eq 452 then occup = 11 ; if dem145 eq 461 then occup = 11 ; if dem145 eq 1021 then occup = 14 ; if dem145 eq 1523 then occup = 14 ; if dem145 eq 575 then occup = 11 ; if dem145 eq 1204 then occup = 9 ; if dem145 eq 1221 then occup = 9 ; if dem145 eq 512 then occup = 9 ; if dem145 eq 1150 then occup = 14 ; if dem145 eq 605 then occup = 10 ; if dem145 eq 362 then occup = 1 ; if dem145 eq 1319 then occup = 13 ; if dem145 eq 101 then occup = 5 ; if dem145 eq 121 then occup = 7 ; if dem145 eq 632 then occup = 11 ; if dem145 eq 651 then occup = 11 ; if dem145 eq 874 then occup = 14 ; if dem145 eq 877 then occup = 14 ; if dem145 eq 875 then occup = 14 ; if dem145 eq 1246 then occup = 13 ; if dem145 eq 1280 then occup = 13 ; if dem145 eq 712 then occup = 14 ; if dem145 eq 722 then occup = 14 ; if dem145 eq 1060 then occup = 13 ; * self-employed; if dem143 eq 2 then occup = 14; if occup eq -8 and (age lt 20 or empstat eq 4) then occup = -7; * sector of employment, dem144 = 6 if others, =8 if declined, =9 if n/a; * p17 of 2170_mrdoc_pdf_a2170uab.pdf; sector = -8; if dem144 eq 1 or dem144 eq 2 or dem144 eq 3 or dem144 eq 4 then sector = 1; if dem144 eq 5 then sector = 2; * there is no single education level variable; * therefore, we have created this conversion table; * if there is any inconsistency, the highest education level is assumed; * Here are the codes in ascending order 1 = no qualification 2 = CSE 2-5 3 = CSE 1 / GCE O 4 = ONC 5 = HNC 6 = City & Guilds 7 = Full appenticeship 8 = GCE A 9 = Teacher training 10 = Nursing qualification 11 = Other professional qualification/diploma 12 = Degree ; * we have used the following guideline on ISCED from wikipedia; * ISCED defined levels of education Level Description Level 0 Pre-Primary Education Level 1 Primary Education or First Stage of Basic Education Level 2 Lower Secondary or Second Stage of Basic Education Level 3 (Upper) Secondary Education Level 4 Post-Secondary Non-Tertiary Education Level 5 First Stage of Tertiary Education (Not leading directly to an advanced research qualification) Level 6 Second Stage of Tertiary Education (Leading to an advanced research qualification) ; educa = -8; if dem119 eq 9 then educa = 1; if dem120 eq 1 then educa = 2; if dem121 eq 2 then educa = 3; if dem124 eq 5 then educa = 4; if dem125 eq 6 then educa = 5; if dem126 eq 7 then educa = 6; if dem127 eq 1 then educa = 7; if dem122 eq 3 then educa = 8; if dem128 eq 2 then educa = 9; if dem129 eq 3 then educa = 10; if dem130 eq 4 then educa = 11; if dem123 eq 4 then educa = 12; edtry = -8; if educa gt 0 and educa le 3 then edtry = 1; if educa ge 4 and educa le 8 then edtry = 2; if educa ge 9 then edtry = 3; * rush is not available; rushed = -9; * workhrs and sector adjustment; if (workhrs = 0 or sector = -8) and empstat = 4 then do; workhrs = -7; sector = -7; end; if educa = 12 and aglftsch = 19 and age > 21 then aglftsch = 21; if educa in (9,10,11) and aglftsch = 19 and age > 20 then aglftsch = 20; drop dem1-dem204; run; *** put the index to the AV and main code lookup table; data uksas.tbl_lu_avcode (index=(actv_cd)); set uksas.tbl_lu_avcode; run; data uksas.tbl_lu_maincode (index=(actv_cd)); set uksas.tbl_lu_maincode; run; *** create a table with the diary and 96 timeslot data; data all ; set uksas.bigdat (obs=max); hh_id = compress(put(dem1,z4.0))||'_'||compress(put(dem2,z4.0)); if dem105 ge 9 then diary_start_dt = mdy(dem105, dem104,1983); if dem105 lt 9 then diary_start_dt = mdy(dem105, dem104,1984); format diary_start_dt date9. ; start_day = weekday(diary_start_dt) ; array act1 {*} sua1-sua96 moa1-moa96 tua1-tua96 wea1-wea96 tha1-tha96 fra1-fra96 saa1-saa96 ; array act2 {*} sub1-sub96 mob1-mob96 tub1-tub96 web1-web96 thb1-thb96 frb1-frb96 sab1-sab96 ; array cop1 {*} suwa1-suwa96 mowa1-mowa96 tuwa1-tuwa96 wewa1-wewa96 thwa1-thwa96 frwa1-frwa96 sawa1-sawa96 ; array cop2 {*} suwb1-suwb96 mowb1-mowb96 tuwb1-tuwb96 wewb1-wewb96 thwb1-thwb96 frwb1-frwb96 sawb1-sawb96 ; array cop3 {*} suwc1-suwc96 mowc1-mowc96 tuwc1-tuwc96 wewc1-wewc96 thwc1-thwc96 frwc1-frwc96 sawc1-sawc96 ; array cop4 {*} suwd1-suwd96 mowd1-mowd96 tuwd1-tuwd96 wewd1-wewd96 thwd1-thwd96 frwd1-frwd96 sawd1-sawd96 ; array loc {*} sul1-sul96 mol1-mol96 tul1-tul96 wel1-wel96 thl1-thl96 frl1-frl96 sal1-sal96 ; do i = 1 to 672 ; day_of_week = floor((i-1)/96) + 1 ; day = mod(day_of_week + 7 - start_day,7) +1; time_slot = mod(i-1,96) + 1; activity1 = act1{i} ; activity2 = act2{i} ; co_person1 = cop1{i} ; co_person2 = cop2{i} ; co_person3 = cop3{i} ; co_person4 = cop4{i} ; location = loc{i} ; * assign AV value; actv_cd = activity1; set uksas.tbl_lu_avcode (keep=av_cd actv_cd) key=actv_cd / unique ; if _iorc_ = 0 then av = av_cd ; if _iorc_ ne 0 then av = 41 ; * assign main value; actv_cd = activity1; set uksas.tbl_lu_maincode (keep=main_cd actv_cd) key=actv_cd / unique ; if _iorc_ = 0 then do; main = main_cd ; if actv_cd = 1502 and location in (5,6) then main = 5; if actv_cd = 402 and location in (5) then main = 5; if actv_cd = 1501 and location not in (5,6,28) then main = 6; if actv_cd = 1502 and location not in (5,6,28) then main = 6; if actv_cd = 101 and location not in (1,2,3,4) then main = 7; if actv_cd = 104 and location not in (1,2,3,4) then main = 7; if actv_cd = 101 and location in (1,2,3,4) then main = 8; if actv_cd = 104 and location in (1,2,3,4) then main = 8; if actv_cd = 301 and location not in (1,2,3,4) then main = 9; if actv_cd = 0 and location in (5,6) then main = 13; if actv_cd = 806 and location ne 40 then main = 27; if actv_cd = 806 and location = 40 then main = 47; end; if _iorc_ ne 0 then do; main = 69 ; if actv_cd = 0 and location ne 1 then main = 41; if actv_cd = 0 and location in (36,37,38,41,42,43,44,45) then main = 62; if actv_cd = 0 and location in (1,2,3,4) and lagactivity1 in (501,502,503,504,505) then main = 1; if actv_cd = 0 and location in (1,2,3,4) and leadactivity1 in (501,502,503,504,505) then main = 1; if actv_cd = 0 and loction ne 99 and lagactivity1 in (1601,1602,1603) then main = 3; if actv_cd = 0 and location ne 99 and leadactivity1 in (1601,1602,1603) then main = 3; if actv_cd = 103 and location in (5,6) and lagactivity1 = 103 then main = 5; if actv_cd = 103 and location in (5,6) and leadactivity1 = 103 then main = 5; end; * assign sec value; actv_cd = activity2; set uksas.tbl_lu_maincode (keep=main_cd actv_cd) key=actv_cd / unique ; if _iorc_ = 0 then do; sec = main_cd ; if actv_cd = 1502 and location in (5,6) then sec = 5; if actv_cd = 402 and location in (5) then sec = 5; if actv_cd = 1501 and location not in (5,6,28) then sec = 6; if actv_cd = 1502 and location not in (5,6,28) then sec = 6; if actv_cd = 101 and location not in (1,2,3,4) then sec = 7; if actv_cd = 104 and location not in (1,2,3,4) then sec = 7; if actv_cd = 101 and location in (1,2,3,4) then sec = 8; if actv_cd = 104 and location in (1,2,3,4) then sec = 8; if actv_cd = 301 and location not in (1,2,3,4) then sec = 9; if actv_cd = 0 and location in (5,6) then sec = 13; if actv_cd = 806 and location ne 40 then sec = 27; if actv_cd = 806 and location = 40 then sec = 47; end; if _iorc_ ne 0 then do; sec = 69 ; if actv_cd = 0 and location ne 1 then sec = 41; if actv_cd = 0 and location in (36,37,38,41,42,43,44,45) then sec = 62; if actv_cd = 0 and location in (1,2,3,4) and lagactivity2 in (501,502,503,504,505) then sec = 1; if actv_cd = 0 and location in (1,2,3,4) and leadactivity2 in (501,502,503,504,505) then sec = 1; if actv_cd = 0 and loction ne 99 and lagactivity2 in (1601,1602,1603) then sec = 3; if actv_cd = 0 and location ne 99 and leadactivity2 in (1601,1602,1603) then sec = 3; if actv_cd = 103 and location in (5,6) and lagactivity2 = 103 then sec = 5; if actv_cd = 103 and location in (5,6) and leadactivity2 = 103 then sec = 5; end; *** previous and next activities; * previous; if i in (1,97,193,289,385,481,577) then do; lagactivity1 = -8 ; lagactivity2 = -8 ; laglocation = -8 ; end; if i not in (1,97,193,289,385,481,577) then do; lagactivity1 = act1{i-1} ; lagactivity2 = act2{i-1} ; laglocation = loc{i-1} ; end; * next; if i in (96,192,288,384,480,576,672) then do; leadactivity1 = -8 ; leadactivity2 = -8 ; leadlocation = -8 ; end; if i not in (96,192,288,384,480,576,672) then do; leadactivity1 = act1{i+1} ; leadactivity2 = act2{i+1} ; leadlocation = loc{i+1} ; end; * date of the diary; format diary_dt yymmdd10.; diary_dt = intnx('day',diary_start_dt, day - 1); output ; end ; keep hh_id day_of_week day time_slot diary_dt activity1-activity2 lagactivity1-lagactivity2 leadactivity1-leadactivity2 co_person1-co_person4 location laglocation leadlocation av main sec; run ; proc sort ; by hh_id day time_slot ; run ; ******************************************** calculate episode information ********************************************; data uksas.episode; set all; by hh_id day; retain; if first.day then do; epnum = 1; tmpstart = 1; tmpend = 1; end; if first.day = 0 then do; tmpend = tmpend + 1; end; *** other ver 6.0 variables; * ict; ict = 0; if activity1 in (207,209,818,3302,4004,4009) or activity2 in (207,209,818,3302,4004,4009) then ict = 1; * inout; * inout based on main activities and not location code; inout = 1; if activity1 in (0,9999) and location = 0 then inout = -8; if activity1 in (501,502,503,504,505 ,1201,1202,1203,1204 ,1701,1702,1703,1704,1705,1706) then inout = 3; if activity1 in (705,804,901 ,1801,1802,1803,1804,1805,1806 ,1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915 ,2001,2101,2102) then inout = 2; * pickup the missing inout with location code; if inout = -8 then do; if location ge 1 and location le 29 then inout = 1; if (location ge 30 and location le 35) or location in (39,40) then inout = 2; if location ge 36 and location le 45 and location not in (39,40) then inout = 3; end; * mtrav; mtrav = -8; if location eq 0 and inout eq 3 then mtrav = 5; if location eq 0 and inout eq 2 then mtrav = 3; if location eq 0 and inout eq 1 then mtrav = -7; if location in (36) and inout eq 3 then mtrav = 1; if location in (37,38,41,42) and inout eq 3 then mtrav = 2; if location in (30,31,32,33,34,35,40) and inout eq 2 then mtrav = 3; if location in (39) and inout eq 2 then mtrav = 4; if location in (43,44,45) and inout eq 3 then mtrav = 5; if location ne 0 and mtrav not in (1,2,3,4,5,-7) then do; if inout eq 1 then mtrav = -7; end; if mtrav = -8 then do; if inout = 2 then mtrav = -7; end; * eloc; eloc = 9; if location = 1 and inout eq 1 then eloc = 1; else if location in (2,3,4) and inout eq 1 then eloc = 2; else if location in (5,6) and inout eq 1 then eloc = 3; else if location in (17) and inout eq 1 then eloc = 4; else if location in (7,8,9,10,11,12,13,14) and inout eq 1 then eloc = 5; else if location in (23,28) and inout eq 1 then eloc = 6; else if location in (18) and inout eq 1 then eloc = 7; else if location in (36,37,38,39,40,41,42,43,44,45) and inout eq 3 then eloc = 8; else if inout eq -8 then eloc = -8; * other people variables; alone = 0; if co_person1 = 0 and co_person2 = 0 and co_person3 = 0 and co_person4 = 0 then alone = 1; child = 0; if co_person1 in (2,3,8,11,12) or co_person2 in (2,3,8,11,12) or co_person3 in (2,3,8,11,12) or co_person4 in (2,3,8,11,12) then child = 1; sppart = 0; if co_person1 in (1,10) or co_person2 in (1,10) or co_person3 in (1,10) or co_person4 in (1,10) then sppart = 1; oad = 0; if co_person1 in (1,4,7,10,13,21,22,24,25,26,27,28,29,30,31,32,33,35) or co_person2 in (1,4,7,10,13,21,22,24,25,26,27,28,29,30,31,32,33,35) or co_person3 in (1,4,7,10,13,21,22,24,25,26,27,28,29,30,31,32,33,35) or co_person4 in (1,4,7,10,13,21,22,24,25,26,27,28,29,30,31,32,33,35) then oad = 1; * check for episode change; format clockst z5.2; if leadactivity1 ne activity1 or leadactivity2 ne activity2 or leadlocation ne location then do; * output result; start = (tmpstart-1) * 15 + 1; end = tmpend * 15; time = end - start + 1; if mod(start-1, 60) eq 0 then clockst = (start-1)/60; else clockst = int((start-1)/60) + 0.6 * ((start-1)/60 - int((start-1)/60)); * adjusting the clock starting time; if clockst ge 20 then clockst = clockst - 20; else clockst = clockst + 4; if sppart = 1 then sppart = time; output; epnum = epnum + 1; tmpstart = tmpend + 1; end; drop tmpstart tmpend time_slot day_of_week activity1-activity2 lagactivity1-lagactivity2 leadactivity1-leadactivity2 co_person1-co_person4 location laglocation leadlocation; run; ********************************************************************************* Create ver 5.3, ver 5.8 and ver 6.0 *********************************************************************************; proc sql; create table ver53 as select hh_id, day, av, sum(time) as av_time_am from uksas.episode group by hh_id, day, av order by hh_id, day, av ; create table main as select hh_id, day, main, sum(time) as main_time_am from uksas.episode group by hh_id, day, main order by hh_id, day, main ; create table sppart as select hh_id, day, sum(sppart) as sppart from uksas.episode group by hh_id, day order by hh_id, day ; quit; *** ver 5.3; data ver53; set ver53; avvar = compress('av'||put(av,2.0)); run; proc transpose data = ver53 out = ver53transpose (drop = _name_); by hh_id day; var av_time_am; id avvar; idlabel avvar; run; data uksas.ver53; set ver53transpose; array av {*} av1-av41; do i = 1 to 41; if av{i} = . then av{i} = 0; end; drop i; run; *** ver 5.8; data main; set main; mainvar = compress('main'||put(main,2.0)); run; proc transpose data = main out = maintranspose (drop = _name_); by hh_id day; var main_time_am; id mainvar; idlabel mainvar; run; data uksas.ver58 ; merge uksas.ver53 (in=a) maintranspose (in=b) sppart (in=c); by hh_id day; if a and b; array main {*} main1-main69; do i = 1 to 69; if main{i} = . then main{i} = 0; end; if sppart = 0 then sppart = -7; drop i; run; ********************************************************************** extract information from activities to back-fill some variables **********************************************************************; * create an unique activity list for each diarist; proc sql; create table tmpactv as select distinct hh_id, main, sec, av, sppart from uksas.episode order by 1 ; quit; * create individual category for each item; proc sql; create table student as select distinct hh_id from tmpactv where main = 16 order by 1 ; create table emp as select distinct hh_id from tmpactv where main in (7,8,9) order by 1 ; create table partner as select distinct hh_id from tmpactv where sppart ne 0 order by 1 ; create table adultcare as select distinct hh_id from tmpactv where main = 32 order by 1 ; create table spemp as select distinct a.hh_id as spouse, a.spouse as hh_id, a.empstat as spemp from uksas.family_stru_out a join uksas.family_stru_out b on a.spouse = b.hh_id order by a.spouse ; create table spemp2 as select distinct a.hh_id , spouse, spemp, b.hh_id as sp_hh_id from spemp a left join emp b on a.spouse = b.hh_id order by a.hh_id ; quit; *** create a final demographic dataset; data uksas.final_demography; merge uksas.family_stru_out (in=a) student (in=b) emp (in=c) partner (in=d) adultcare (in=e) spemp (in=f); by hh_id; if a; * student; *if b then student = 1; * paid work; if c then do; if unemp = 1 or student = 1 or retired = 1 then do; empstat = 3; emp = 1; end; end; * adult care; carer = 0; if e then carer = 1; * spouse work; empsp = -7; if f then do; empsp = spemp; if empsp = 4 then empsp = 3; end; if civstat = 2 then empsp = -7; drop spemp; run; ********************************************************************************* determine bad cases *********************************************************************************; proc sort data = uksas.episode; by hh_id day start; run; *** pick up age and sex; proc sort data = uksas.family_stru_out (keep=hh_id sex age) out = agesex; by hh_id; run; *** pick up information from activity; data basicact; set uksas.episode; * basic activity; act1 = 0; * sleep ; act2 = 0; * eat ; act3 = 0; * personal care ; act4 = 0; * travel ; if main in (2,3) then act1 = 1; if main in (5,6,18,19,39) or eloc = 6 then act2 = 1; if main in (1,4,20,21,25) then act3 = 1; if main in (11,43,44,45,46,47,62,63,64,65,66,67) or eloc ne 1 then act4 = 1; missing = 0; if main in (0,69) then missing = 1; keep hh_id day act1-act4 missing epnum; run; proc sql; create table tmpbasicact as select a.hh_id ,day ,max(act1) as act1 ,max(act2) as act2 ,max(act3) as act3 ,max(act4) as act4 ,max(epnum) as maxep ,sum(missing) as maxmiss ,max(sex) as sex ,max(age) as age from basicact a, agesex b where a.hh_id = b.hh_id group by a.hh_id, day order by a.hh_id, day ; quit; data uksas.badcase (index=(hh_id day)); merge tmpbasicact (in=a) adultcare (in=b); by hh_id; if a; badcase = 0; sumact = act1 + act2 + act3 + act4; if sex eq -8 or age in (0,-8) then badcase = 1; if maxmiss gt 6 then badcase = 3; if maxep lt 7 then badcase = 4; if sumact le 2 then badcase = 5; if (sex eq -8 or age in (0,-8)) and maxmiss gt 6 then badcase = 7; if (sex eq -8 or age in (0,-8)) and maxep lt 7 then badcase = 8; if sex eq -8 and sumact le 2 then badcase = 9; if maxmiss gt 6 and maxep lt 7 then badcase = 13; if maxmiss gt 6 and sumact le 2 then badcase = 14; if maxep lt 7 and sumact le 2 then badcase = 15; if (sex eq -8 or age in (0,-8)) and maxmiss gt 6 and maxep lt 7 then badcase = 19; if (sex eq -8 or age in (0,-8)) and maxmiss gt 6 and sumact le 2 then badcase = 20; if (sex eq -8 or age in (0,-8)) and sumact le 2 and maxep lt 7 then badcase = 21; if maxmiss gt 6 and maxep lt 7 and sumact le 2 then badcase = 25; if (sex eq -8 or age in (0,-8)) and maxmiss gt 6 and sumact le 2 and maxep lt 7 then badcase = 29; *** exception; if act4 = 0 and sum(of act1-act3) > 0 and sex in (1,2) then badcase = 0; if b and sex in (1,2) then badcase = 0; if sumact = 2 and maxep ge 15 and sex in (1,2) then badcase = 0; if act3 = 0 and (act1 + act2 + act4) > 1 and sex in (1,2) then badcase = 0; keep hh_id badcase day ; run; /* Sex and age of 1985 UK Source: Population Division of the Department of Economic and Social Affairs of the United Nations Secretariat, World Population Prospects: The 2008 Revision, http://esa.un.org/unpp, Saturday, October 02, 2010; 4:00:23 AM. Row Labels Female Male Grand Total 0-4 1759 1846 3605 5-9 1653 1742 3395 10-14 1888 1986 3874 15-19 2218 2310 4528 20-24 2331 2382 4713 25-29 2033 2060 4093 30-34 1871 1897 3768 35-39 2069 2072 4141 40-44 1700 1719 3419 45-49 1579 1586 3165 50-54 1525 1515 3040 55-59 1576 1508 3084 60-64 1657 1496 3153 65-69 1396 1152 2548 70-74 1387 1018 2405 75-79 1134 689 1823 80-84 760 352 1112 85-89 372 120 492 90-94 128 32 160 95-99 28 6 34 100+ 3 0 3 Grand Total 29067 27488 56555 */ * create format for age; proc format; value agefmt 0-14 = '1' 15-19 = '2' 20-24 = '3' 25-29 = '4' 30-34 = '5' 35-39 = '6' 40-44 = '7' 45-49 = '8' 50-54 = '9' 55-59 = '10' 60-64 = '11' 65-69 = '12' 70-74 = '13' 75-79 = '14' 80-100 = '15' ; * actual sex and age distribution; data agesex; set uksas.final_demography; agegp = put(age, agefmt.); keep hh_id agegp sex; run; proc sql; create table agesex as select agegp, sex, count(distinct hh_id) as n from agesex group by 1,2 order by 1,2 ; quit; * actual age and sex distribution, excl unknown sex Row Labels 1 2 Grand Total 1 53 63 116 2 171 266 437 3 115 187 302 4 113 147 260 5 101 151 252 6 120 229 349 7 124 139 263 8 116 134 250 9 88 110 198 10 92 86 178 11 72 97 169 12 69 84 153 13 65 61 126 14 29 34 63 15 20 11 31 Grand Total 1348 1799 3147 ; ** combining diaries across days; proc sort data=uksas.badcase out = badcase; by hh_id day; run; *** start the weighting process; *** step 1; data tmpstep1; set uksas.final_demography; wgt = 1; if put(age, agefmt.) = '1' and sex = 1 then wgt = 8.04897887012643 ; if put(age, agefmt.) = '2' and sex = 1 then wgt = 1.07218636725312 ; if put(age, agefmt.) = '3' and sex = 1 then wgt = 1.16818658744285 ; if put(age, agefmt.) = '4' and sex = 1 then wgt = 0.939374339836861 ; if put(age, agefmt.) = '5' and sex = 1 then wgt = 0.966815229318675 ; if put(age, agefmt.) = '6' and sex = 1 then wgt = 0.944846423369891 ; if put(age, agefmt.) = '7' and sex = 1 then wgt = 0.930852709751569 ; if put(age, agefmt.) = '8' and sex = 1 then wgt = 0.93690775672917 ; if put(age, agefmt.) = '9' and sex = 1 then wgt = 1.12509946070197 ; if put(age, agefmt.) = '10' and sex = 1 then wgt = 0.890830326070358 ; if put(age, agefmt.) = '11' and sex = 1 then wgt = 0.972115639642826 ; if put(age, agefmt.) = '12' and sex = 1 then wgt = 0.855521174078331 ; if put(age, agefmt.) = '13' and sex = 1 then wgt = 0.735007220110217 ; if put(age, agefmt.) = '14' and sex = 1 then wgt = 0.942566388564462 ; if put(age, agefmt.) = '15' and sex = 1 then wgt = 1.32561223587658 ; if put(age, agefmt.) = '1' and sex = 2 then wgt = 7.65331683022427 ; if put(age, agefmt.) = '2' and sex = 2 then wgt = 0.640568178469337 ; if put(age, agefmt.) = '3' and sex = 2 then wgt = 0.712803257630521 ; if put(age, agefmt.) = '4' and sex = 2 then wgt = 0.668892945032784 ; if put(age, agefmt.) = '5' and sex = 2 then wgt = 0.572138522104979 ; if put(age, agefmt.) = '6' and sex = 2 then wgt = 0.497946934645723 ; if put(age, agefmt.) = '7' and sex = 2 then wgt = 0.761846112572747 ; if put(age, agefmt.) = '8' and sex = 2 then wgt = 0.720034991554565 ; if put(age, agefmt.) = '9' and sex = 2 then wgt = 0.70782866236407 ; if put(age, agefmt.) = '10' and sex = 2 then wgt = 0.910311496183656 ; if put(age, agefmt.) = '11' and sex = 2 then wgt = 0.769096454778534 ; if put(age, agefmt.) = '12' and sex = 2 then wgt = 0.659734289227703 ; if put(age, agefmt.) = '13' and sex = 2 then wgt = 0.838405914802014 ; if put(age, agefmt.) = '14' and sex = 2 then wgt = 1.1790151180267 ; if put(age, agefmt.) = '15' and sex = 2 then wgt = 4.79374060648926 ; agegp = put(age, agefmt.); if sex not in (1,2) then wgt = 1; keep hh_id wgt agegp sex ocombwt; run; * 1480 unique diarists; proc sort data=tmpstep1 nodupkey; by hh_id wgt agegp sex; run; *** step 2; * calculate good diary; proc sql; create table tmpstep2 as select count(*) as nr, sum(case when badcase = 0 then 1 else 0 end) as goodnum from badcase ; quit; *** step 3; * find out the badcase for each hh_id by day; proc sql; create table tmpbadnr as select distinct a.hh_id, day as dayid, wgt, nr, goodnum, badcase, a.sex, agegp, ocombwt from tmpstep1 a, badcase b, tmpstep2 c where a.hh_id = b.hh_id and a.hh_id = b.hh_id order by hh_id, day ; create table totgoodwgt as select sum(wgt) as totgoodwgt, sum(ocombwt) as totgoodocomb from tmpbadnr where badcase = 0 ; create table tmpbadnr as select distinct * from tmpbadnr, totgoodwgt ; quit; data tmpstep3; set tmpbadnr ; if badcase ne 0 then intwgt = 0; if badcase eq 0 then intwgt = wgt * nr / totgoodwgt; if badcase ne 0 then ocombintwgt = 0; if badcase eq 0 then ocombintwgt = ocombwt * nr / totgoodocomb; * tid is an unique diary ID, for counting purpose; tid = _n_; run; *** step 4; proc sql; create table tmpstep4 as select agegp, sex, sum(intwgt) as asewt, sum(ocombintwgt) as oasewt from tmpstep3 group by 1,2 ; quit; *** step 5; proc sql; create table tmpstep5 as select agegp, sex, dayid, sum(intwgt) as asedaywt, sum(ocombintwgt) as oasedaywt, count(distinct tid) as asedaycase from tmpstep3 group by 1,2,3 order by 1,2,3 ; quit; *** step 6; data tmpstep6; set tmpstep4; esw = asewt/7; oesw = oasewt/7; run; *** step 7; proc sql; create table tmpstep7 as select distinct * from tmpstep3 a, tmpstep5 b, tmpstep4 c, tmpstep6 d where a.agegp = b.agegp and a.agegp = c.agegp and a.agegp = d.agegp and a.sex = b.sex and a.sex = c.sex and a.sex = d.sex and a.dayid = b.dayid order by a.hh_id, a.dayid ; quit; data tmpstep7; set tmpstep7; propwt = intwgt * (esw/asedaywt) ; opropwt = ocombintwgt * (oesw/oasedaywt) ; if propwt eq . then propwt = 0; if opropwt eq . then opropwt = 0; run; proc means data = tmpstep7 n sum mean; title 'Checking propwt - overall'; var propwt wgt intwgt opropwt ocombwt ocombintwgt; run; proc means data = tmpstep7 n sum mean; title 'Checking propwt - by day'; var propwt wgt intwgt opropwt ocombwt ocombintwgt; class dayid; run; proc freq data = badcase; tables badcase; title 'Distribution of badcase'; run; data uksas.weight (index=(hh_id)); set tmpstep7; day = dayid; keep hh_id day propwt opropwt wgt; run; * read in the schemas; * Read the dataset formats into memory; proc format cntlin = uksas.MTUSschemaW553; run; proc format cntlin = uksas.MTUSschemaW58; run; proc format cntlin = uksas.MTUSschemaW60; run; ********************************************************************* Common files *********************************************************************; proc sort data=uksas.final_demography out = final_demo; by hh_id; run; proc sort data=uksas.badcase out = final_badcase; by hh_id day; run; proc sort data=uksas.weight out = final_weight; by hh_id day; run; ********************************************************************* Ver 5.3 *********************************************************************; *** combine badcase and weight; proc sql; create table final_ver53 as select distinct * from uksas.ver53 a, final_badcase b, final_weight c where a.hh_id = b.hh_id and a.day = b.day and a.hh_id = c.hh_id and a.day = c.day order by a.hh_id, a.day ; quit; data merged_v53; merge final_ver53 (in=a) final_demo (in=b); by hh_id; wkday = weekday(diary_start_dt); if day = wkday then diary = 1; if day > wkday then diary = day - wkday + 1; if day < wkday then diary = 7 - wkday + day + 1; id = diary; format diary_dt yymmdd10.; diary_dt = intnx('day',diary_start_dt, day - 1); month = month(diary_dt); year = year(diary_dt); cday = day(diary_dt); * setting the weight to the original survey weight; propwt = opropwt; run; proc datasets lib=work memtype = data; modify merged_v53; attrib _all_ label=' '; attrib _all_ format=; run; data uksas.uk1983w553; array fixorder countreg survey swave msamp hldid persid id parntid1 parntid2 partid day month year diary badcase hhtype hhldsize nchild agekidx agekid2 incorig income ownhome urban computer vehicle sex age famstat cphome singpar relrefp civstat cohab citizen empstat emp unemp student retired empsp workhrs empinclm occup sector educa edtry rushed health carer disab av1 av2 av3 av4 av5 av6 av7 av8 av9 av10 av11 av12 av13 av14 av15 av16 av17 av18 av19 av20 av21 av22 av23 av24 av25 av26 av27 av28 av29 av30 av31 av32 av33 av34 av35 av36 av37 av38 av39 av40 av41 ocombwt propwt; set merged_v53 uksas.schema553; if survey eq . then delete; keep countreg survey swave msamp hldid persid id parntid1 parntid2 partid day month year diary badcase hhtype hhldsize nchild agekidx agekid2 incorig income ownhome urban computer vehicle sex age famstat cphome singpar relrefp civstat cohab citizen empstat emp unemp student retired empsp workhrs empinclm occup sector educa edtry rushed health carer disab av1 av2 av3 av4 av5 av6 av7 av8 av9 av10 av11 av12 av13 av14 av15 av16 av17 av18 av19 av20 av21 av22 av23 av24 av25 av26 av27 av28 av29 av30 av31 av32 av33 av34 av35 av36 av37 av38 av39 av40 av41 ocombwt propwt; run; ********************************************************************* Ver 5.3 *********************************************************************; *** combine badcase and weight; proc sql; create table final_ver58 as select distinct * from uksas.ver58 a, final_badcase b, final_weight c where a.hh_id = b.hh_id and a.day = b.day and a.hh_id = c.hh_id and a.day = c.day order by a.hh_id, a.day ; quit; data merged_v58; merge final_ver58 (in=a) final_demo (in=b); by hh_id; wkday = weekday(diary_start_dt); if day = wkday then diary = 1; if day > wkday then diary = day - wkday + 1; if day < wkday then diary = 7 - wkday + day + 1; id = diary; format diary_dt yymmdd10.; diary_dt = intnx('day',diary_start_dt, day - 1); month = month(diary_dt); year = year(diary_dt); cday = day(diary_dt); * setting the weight to the original survey weight; propwt = opropwt; run; proc datasets lib=work memtype = data; modify merged_v58; attrib _all_ label=' '; attrib _all_ format=; run; data uksas.uk1983w58; array fixorder countreg survey swave msamp hldid persid id parntid1 parntid2 partid day month year diary badcase hhtype hhldsize nchild agekidx agekid2 incorig income ownhome urban computer vehicle sex age famstat cphome singpar relrefp civstat cohab citizen empstat emp unemp student retired empsp workhrs empinclm occup sector educa edtry rushed health carer disab main1 main2 main3 main4 main5 main6 main7 main8 main9 main10 main11 main12 main13 main14 main15 main16 main17 main18 main19 main20 main21 main22 main23 main24 main25 main26 main27 main28 main29 main30 main31 main32 main33 main34 main35 main36 main37 main38 main39 main40 main41 main42 main43 main44 main45 main46 main47 main48 main49 main50 main51 main52 main53 main54 main55 main56 main57 main58 main59 main60 main61 main62 main63 main64 main65 main66 main67 main68 main69 av1 av2 av3 av4 av5 av6 av7 av8 av9 av10 av11 av12 av13 av14 av15 av16 av17 av18 av19 av20 av21 av22 av23 av24 av25 av26 av27 av28 av29 av30 av31 av32 av33 av34 av35 av36 av37 av38 av39 av40 av41 sppart ocombwt propwt ; set merged_v58 uksas.schema58; if survey eq . then delete; keep countreg survey swave msamp hldid persid id parntid1 parntid2 partid day month year diary badcase hhtype hhldsize nchild agekidx agekid2 incorig income ownhome urban computer vehicle sex age famstat cphome singpar relrefp civstat cohab citizen empstat emp unemp student retired empsp workhrs empinclm occup sector educa edtry rushed health carer disab main1 main2 main3 main4 main5 main6 main7 main8 main9 main10 main11 main12 main13 main14 main15 main16 main17 main18 main19 main20 main21 main22 main23 main24 main25 main26 main27 main28 main29 main30 main31 main32 main33 main34 main35 main36 main37 main38 main39 main40 main41 main42 main43 main44 main45 main46 main47 main48 main49 main50 main51 main52 main53 main54 main55 main56 main57 main58 main59 main60 main61 main62 main63 main64 main65 main66 main67 main68 main69 av1 av2 av3 av4 av5 av6 av7 av8 av9 av10 av11 av12 av13 av14 av15 av16 av17 av18 av19 av20 av21 av22 av23 av24 av25 av26 av27 av28 av29 av30 av31 av32 av33 av34 av35 av36 av37 av38 av39 av40 av41 sppart ocombwt propwt ; run; ********************************************************************* Ver 6 *********************************************************************; *** combine badcase and weight; proc sql; create table final_ver6 as select distinct * from uksas.episode a, final_badcase b, final_weight c where a.hh_id = b.hh_id and a.day = b.day and a.hh_id = c.hh_id and a.day = c.day order by a.hh_id, a.day ; quit; data merged_v6; merge final_ver6 (in=a) final_demo (in=b); by hh_id; wkday = weekday(diary_start_dt); if day = wkday then diary = 1; if day > wkday then diary = day - wkday + 1; if day < wkday then diary = 7 - wkday + day + 1; id = diary; format diary_dt yymmdd10.; diary_dt = intnx('day',diary_start_dt, day - 1); month = month(diary_dt); year = year(diary_dt); cday = day(diary_dt); run; proc datasets lib=work memtype = data; modify merged_v6; attrib _all_ label=' '; attrib _all_ format=; run; data uksas.uk1983w60; array fixorder countreg survey swave msamp hldid persid id day cday month year diary badcase sex age time clockst start end epnum main sec av inout eloc ict mtrav alone child sppart oad ; set merged_v6 uksas.schema60; if survey eq . then delete;; keep countreg survey swave msamp hldid persid id day cday month year diary badcase sex age time clockst start end epnum main sec av inout eloc ict mtrav alone child sppart oad ; run; proc format; value fmteducalab 1 = 'no qualification' 2 = 'CSE 2-5' 3 = 'CSE 1 / GCE O' 4 = 'ONC' 5 = 'HNC' 6 = 'City & Guilds' 7 = 'Full appenticeship' 8 = 'GCE A' 9 = 'Teacher training' 10 = 'Nursing qualification' 11 = 'Other professional qualification/diploma' 12 = 'Degree' -8 = 'Unknown' ; value fmtincolab 5 = 'Less than 2000' 2 = '2000 - 2999' 11 = '3000 - 3999' 6 = '4000 - 4999' 10 = '5000 - 5999' 8 = '6000 - 6999' 1 = '7000 - 7999' 9 = '8000 - 9999' 4 = '10000 - 11999' 7 = '12000 - 14999' 12 = '15000 - 19999' 3 = '20000 or more' ; value fmtempinlab 5 = '1000/12' 2 = '2500/12' 11 = '3500/12' 6 = '4500/12' 10 = '5500/12' 8 = '6500/12' 1 = '7500/12' 9 = '8500/12' 4 = '9500/12' 7 = '10500/12' 12 = '11500/12' 3 = '30000/12' ; quit; ********************************************************************* demographic files *********************************************************************; proc sort data=uksas.final_demography (keep=hh_id survey hldid persid sex age region aglftsch empinclm diary_start_dt educa rename=(sex=sexd age=aged empinclm = empinlab educa=educalab)) out = final_demo; by hh_id ; run; proc sort data=uksas.demography (keep=hh_id dem52 dem119 rename=(dem52=incolab )) out = demo; by hh_id; run; proc sort data = uksas.ver53 (keep = hh_id day ) out = final_ver53 ; by hh_id day; run; data merged_v53; merge final_ver53 (in=a) final_demo (in=b); by hh_id; wkday = weekday(diary_start_dt); if day = wkday then diary = 1; if day > wkday then diary = day - wkday + 1; if day < wkday then diary = 7 - wkday + day + 1; id = diary; format diary_dt yymmdd10.; diary_dt = intnx('day',diary_start_dt, day - 1); month = month(diary_dt); year = year(diary_dt); cday = day(diary_dt); run; proc datasets lib=work memtype = data; modify merged_v53; modify final_demo ; modify demo ; attrib _all_ label=' '; attrib _all_ format=; run; *** get animal data; data animaldata ; set uksas.bigdat (obs=max); hh_id = compress(put(dem1,z4.0))||'_'||compress(put(dem2,z4.0)); if dem105 ge 9 then diary_start_dt = mdy(dem105, dem104,1983); if dem105 lt 9 then diary_start_dt = mdy(dem105, dem104,1984); format diary_start_dt date9. ; start_day = weekday(diary_start_dt) ; array act1 {*} sua1-sua96 moa1-moa96 tua1-tua96 wea1-wea96 tha1-tha96 fra1-fra96 saa1-saa96 ; array act2 {*} sub1-sub96 mob1-mob96 tub1-tub96 web1-web96 thb1-thb96 frb1-frb96 sab1-sab96 ; array cop1 {*} suwa1-suwa96 mowa1-mowa96 tuwa1-tuwa96 wewa1-wewa96 thwa1-thwa96 frwa1-frwa96 sawa1-sawa96 ; array cop2 {*} suwb1-suwb96 mowb1-mowb96 tuwb1-tuwb96 wewb1-wewb96 thwb1-thwb96 frwb1-frwb96 sawb1-sawb96 ; array cop3 {*} suwc1-suwc96 mowc1-mowc96 tuwc1-tuwc96 wewc1-wewc96 thwc1-thwc96 frwc1-frwc96 sawc1-sawc96 ; array cop4 {*} suwd1-suwd96 mowd1-mowd96 tuwd1-tuwd96 wewd1-wewd96 thwd1-thwd96 frwd1-frwd96 sawd1-sawd96 ; array loc {*} sul1-sul96 mol1-mol96 tul1-tul96 wel1-wel96 thl1-thl96 frl1-frl96 sal1-sal96 ; do i = 1 to 672 ; day_of_week = floor((i-1)/96) + 1 ; day = mod(day_of_week + 7 - start_day,7) +1; time_slot = mod(i-1,96) + 1; activity1 = act1{i} ; activity2 = act2{i} ; co_person1 = cop1{i} ; co_person2 = cop2{i} ; co_person3 = cop3{i} ; co_person4 = cop4{i} ; location = loc{i} ; * date of the diary; format diary_dt yymmdd10.; diary_dt = intnx('day',diary_start_dt, day - 1); if activity1 in (806,816,1913) or activity2 in (806,816,1913) then output; end ; keep hh_id day_of_week day time_slot diary_dt activity1 activity2 location ; run ; proc sql; create table animal as select hh_id, day, count(*) as tottime from animaldata group by hh_id, day order by hh_id, day ; quit; proc datasets lib=work memtype = data; modify animal; attrib _all_ label=' '; attrib _all_ format=; run; ********************************************************************* supplement data *********************************************************************; data supplement; merge merged_v53 (in=d) final_demo (in=a) demo (in=b) animal (in=c); by hh_id; if d; animal = tottime*15; if animal eq . then animal = -7; ethnic = -9; drop tottime; run; data uksas.supplement; array fixorder survey hldid persid id sexd aged incolab educalab empinlab region aglftsch ethnic animal; set supplement uksas.schemaUKextravar; if survey eq . then delete; format incolab fmtincolab. educalab fmteducalab. empinlab fmtempinlab.; keep survey hldid persid id sexd aged incolab educalab empinlab region aglftsch ethnic animal; run;