*** WEIGHTED LEAST SQUARES ; * Read in the data from {prostmod.xls} ; PROC IMPORT DATAFILE = 'C:\Documents and Settings\richc\My Documents\CPH931F09\prostmod.xls' OUT = prostate DBMS = EXCEL REPLACE; SHEET = prost; GETNAMES = YES; RUN; * Initiate a ``transcript'' in the Word file {prostresults.rtf}. ; ODS RTF FILE = 'C:\Documents and Settings\richc\My Documents\CPH931F09\prostresults.rtf'; * Fit a linear regression model via ordinary least squares. * The response variable is prostate specific antigen (after * square root transformation). The explanatory variables * are cancer volume (after logarithmic transformation) and * prostate weight (after logarithmic transformation). Plot * ordinary and studentized residuals against predicted/fitted * values of prostate specific antigen. ; PROC REG DATA = prostate; MODEL SPSA = LCAVOL LWEIGHT / VIF CORRB ALPHA = 0.05 ; PLOT R. * P.; PLOT STUDENT. * P.; RUN; * Create a new data set prostatenew in which the ordinary * residuals and predicted/fitted values of prostate * specific antigen are recorded as the variables RESID * and PRED. ; PROC REG DATA = prostate; MODEL SPSA = LCAVOL LWEIGHT / VIF CORRB ALPHA = 0.05 ; OUTPUT OUT=prostatenew R = RESID P = PRED; RUN; * Fit an auxiliary linear regression model in which the * absolute values of the residuals are treated as roughly * linear in the predicted/fitted values of prostate * specific antigen. Record the predicted/fitted values * from the auxiliary model as the variable AUXPRED. ; DATA prostatenew; SET prostatenew; ABSRESID = abs(RESID); RUN; PROC REG DATA = prostatenew; MODEL ABSRESID = PRED; OUTPUT OUT=prostatenew2 P = AUXPRED; RUN; * Now fit a linear regression model with weighted least * squares, using AUXPRED to define the weights. ; DATA prostatenew2; SET prostatenew2; WEIGHTSFORWLS = 1/(AUXPRED*AUXPRED); RUN; PROC REG DATA = prostatenew2; MODEL SPSA = LCAVOL LWEIGHT / VIF CORRB ALPHA = 0.05 ; WEIGHT WEIGHTSFORWLS; RUN; * End Word transcript. ; ODS RTF CLOSE; RUN; *** RIDGE REGRESSION ; * Read in the data from {Cholesterol.xls} ; PROC IMPORT DATAFILE = 'C:\Documents and Settings\richc\My Documents\CPH931F09\Cholesterol.xls' OUT = chol DBMS = EXCEL REPLACE; SHEET = Cholesterol; GETNAMES = YES; RUN; * Initiate a ``transcript'' in the Word file {cholresults.rtf}. ; ODS RTF FILE = 'C:\Documents and Settings\richc\My Documents\CPH931F09\cholresults.rtf'; * Fit a linear regression model via ordinary least squares. The * response variable is total serum cholesterol. The explanatory * variables are total fat intake, saturated fat intake, vegetable * fat intake, polyunsaturated fat intake, and animal fat intake. ; PROC REG DATA = Chol; MODEL TC = TOTFAT SATFAT VEGFAT POLYFAT ANIMFAT / VIF CORRB ALPHA = 0.05 ; RUN; * Produce a ridge trace to determine a suitable value for the * ridge parameter. ; title 'Ridge Trace of Cholesterol Data'; symbol1 v=x c=blue; symbol2 v=circle c=yellow; symbol3 v=square c=cyan; symbol4 v=triangle c=green; symbol5 v=plus c=orange; legend2 position=(bottom right inside) across=3 cborder=black offset=(0,0) label=(color=blue position=(top center) 'coefficient estimates') cframe=white; proc reg data=Chol outest=b ridge=0 to 0.02 by 0.002; model TC = TOTFAT SATFAT VEGFAT POLYFAT ANIMFAT /noprint; plot / ridgeplot nomodel legend=legend2 nostat vref=0 lvref=1 cvref=blue cframe=ligr; run; * Fit a linear regression model via ridge regression with * the chosen value for the ridge parameter. ; title ''; PROC REG DATA = Chol outseb outest=ridgeresults ridge=0.002; MODEL TC = TOTFAT SATFAT VEGFAT POLYFAT ANIMFAT / VIF CORRB ALPHA = 0.05 ; RUN; PROC PRINT DATA=ridgeresults; RUN; * End Word transcript. ; ODS RTF CLOSE; RUN; *** ROBUST REGRESSION * Read in the data from {FEV.xls} ; PROC IMPORT DATAFILE = 'C:\Documents and Settings\richc\My Documents\CPH931F09\FEV.xls' OUT = pulmonary DBMS = EXCEL REPLACE; SHEET = FEV; GETNAMES = YES; RUN; * Initiate a ``transcript'' in the Word file {pulmresults.rtf}. ; ODS RTF FILE = 'C:\Documents and Settings\richc\My Documents\CPH931F09\pulmresults.rtf'; * Fit a linear regression model via ordinary least squares. * The response variable is forced expiratory volume (after * logarithmic transformation). The explanatory variables * are age in years, height in inches, gender (1 male, * 0 female), and smoking status (1 yes, 0 no). ; PROC REG DATA = pulmonary; MODEL logFEV = Age Hgt Sex Smoke / influence ; PLOT RSTUDENT. * P.; PLOT RSTUDENT. * AGE; PLOT RSTUDENT. * HGT; PLOT RSTUDENT. * SEX; PLOT RSTUDENT. * SMOKE; RUN; * Fit a linear regression model via M estimation (robust * method). ; proc robustreg data=pulmonary; model logFEV = Age Hgt Sex Smoke ; run; * End Word transcript. ; ODS RTF CLOSE; RUN;