*** HANDLING MISSING DATA * Read in the data from {Mercury.xls}. ; PROC IMPORT DATAFILE = 'C:\Documents and Settings\richc\My Documents\CPH931F08\Mercury.xls' OUT = Mercury DBMS = EXCEL REPLACE; SHEET = Mercury; GETNAMES = YES; RUN; * Create a log-transformed version of 3-year standard mercury. ; DATA MERCURY; SET MERCURY; LOGMERCURY = LOG( __YR_STANDARD_MERCURY); RUN; * Initiate a ``transcript'' in the Word file {Mercury.rtf}. ; ODS RTF FILE = 'C:\Documents and Settings\richc\My Documents\CPH931F08\Mercury.rtf'; * Fit a linear regression model with log-transformed 3-year standard mercury * as the response variable and PH as the sole explanatory variable. Display a * scatterplot of log-transformed 3-year standard mercury against PH. Create a * new dataset with predicted values and residuals from the linear regression model. ; PROC REG DATA = Mercury; MODEL LOGMERCURY = PH; PLOT LOGMERCURY * PH; OUTPUT OUT=MercuryNew R = RESID P = PRED; RUN; * Replace missing values on log-transformed 3-year standard mercury by * predicted values from the linear regression model. ; DATA MERCURYNEW; SET MERCURYNEW; IF LOGMERCURY = . THEN LOGMERCURY = PRED; RUN; * Refit the linear regression model, having replaced missing values on * log-transformed 3-year standard mercury by predicted values. ; PROC REG DATA = MERCURYNEW; MODEL LOGMERCURY = PH; PLOT LOGMERCURY * PH; RUN; * Going back to the data with missing values on log-transformed 3-year * standard mercury, multiply impute the missing values. Now we have * five ``complete'' datasets. ; PROC MI DATA=MERCURY SEED=12345 nimpute=5 out=imputed; mcmc chain=multiple displayinit initial=em(itprint); var LOGMERCURY PH; RUN; PROC PRINT DATA=imputed; var LOGMERCURY PH; RUN; * Fit five linear regression models, one for each complete dataset. ; PROC REG DATA=imputed outest=outreg covout; MODEL LOGMERCURY = PH; BY _IMPUTATION_; RUN; * Combine the parameter estimates and standard errors from the five * linear regression models to obtain a single set of ``overall'' * parameter estimates and standard errors. ; PROC MIANALYZE DATA=OUTREG; VAR INTERCEPT PH; RUN; * End Word transcript. ; ODS RTF CLOSE; RUN;