Vous êtes sur la page 1sur 4

2017 Practical Methods for Secondary Data Analysis

Data Manipulation 1, March 20, 2017

***datasets A1 to A4***;
data A1;
input ID $ weight hight age gender $;
datalines;
1 50 165.00 20 F
4 75 175.00 40 M
5 55 160.00 60 F
6 40 156.00 75 M
2 45 155.00 50 M
3 60 170.00 35 F
;
data A2;
input ID $ disease;
datalines;
1 1
2 0
3 0
4 0
5 1
;
data A3;
input ID $ weight disease;
datalines;
1 55 1
2 45 0
3 58 0
4 70 0
5 65 1
;
data A4;
input ID $ disease;
datalines;
1 1
2 0
3 0
5 1
4 0
7 1
;

*****slide #8*****;
proc sort data=A1;by id; run;
proc sort data=A4;by id; run;
data A1A4;
merge A1 A4;
by id;
run;

*****slide #9*****;
proc sort data=A1;by id; run;
proc sort data=A4;by id; run;
data A1A4;
merge A1 (in=k) A4;
by id;
if k=1 then output;
run;
*****slide #10*****;
data A1A4;
merge A1 (in=k) A4 (in=j);
by id;
if k=1 and j=1 then output;
run;

*****slide #11*****;
proc sort data=A1;by id;run;
proc sort data=A5;by id;run;
data A1A5;
merge A1 A5;
by id;
run;

*****slice #13*****;
data A5;
input ID $ drug $;
datalines;
1 A1
1 A2
1 A3
3 A2
4 A1
4 A3
;
proc sort data=A1;by id;run;
proc sort data=A5;by id;run;
data A1A5;
merge A1 A5(in=x);
by id;
if x then output;
run;

*****slice #15*****;
data A1A3;
merge A1(rename=(weight=weight1)) A3(rename=(weight=weight3)) ;
by id;
run;

*****slice #17*****;
data B1;
input ID $ age gender $;
datalines;
1 20 F
2 50 M
3 35 F
;

data B2;
input ID $ age gender $;
datalines;
1 20 F
4 40 M
5 60 F
6 75 M
;

data B1B2;
set B1 B2;
run;

*****slice #19*****;
proc sort data=B1; by id; run;
proc sort data=B2; by id; run;
data I_B1B2;
set B1 B2;
by id;
run;

*****slice #21*****;
data C1;
input id $ hosp_id $ year;
datalines;
3 3 1998
1 1 2000
2 1 2000
2 1 2000
1 2 2003
1 1 2008
4 2 2002
3 4 2010
;

proc sort data = C1 nodupkey out = C4;


by id;
run;

*****slice #22*****;
proc sort data = C1 nodupkey out = C5;
by id hosp_id;
run;

*****slice #23*****;
proc sort data = C1 nodupkey;
by id hosp_id;
run;

*****slice #26*****;
Libname exp1 'F:\NHIRD TeachDB2015\TID';
proc sort data=exp1.id2002 nodupkey out=id2002_1;
by id;
run;

data id2002; set exp1.id2002;


proc sort data=id2002 nodupkey out=id2002_1;
by id;
run;

*****slice #27*****;
data idall; set exp1.id2002-exp1.id2005;
proc sort data=idall nodupkey out=idall_1;
by id;
run;

*****slice #33*****;
Libname cd 'F:\NHIRD TeachDB2015\TCD';
data cd2001; set cd.cd2001; run;
data dm1; set cd2001 (keep=id icd9_1-icd9_3);
if substr(icd9_1, 1, 3)="250" or
substr(icd9_2, 1, 3)="250" or
substr(icd9_3, 1, 3)="250" or
substr(icd9_1, 1, 4)="A181" or
substr(icd9_2, 1, 4)="A181" or
substr(icd9_3, 1, 4)="A181";
run;