# 1. Consider the data set “FRWRD.txt” containing
values in US $ of the 36 Henry Hub
# natural gas forward contracts traded on March 23rd
1998. Use different cross validation
# technique to choose best polynomial (from degree 3, 6
and 8) for fitting the data. Use
# 1 to 36 as explanatory variable for the sake of
simplicity. (Use R)
cval=function()
{
library(DAAG)
require(graphics)
mydata=read.table("FRWRD.txt",header=FALSE);
data=mydata[2];
y=gl(6, 6, length = 36, labels = 1:6, ordered = FALSE)
m=seq(1,36,length=36);
# Multiple Linear Regression
data=t(data);
x=c(data);
fit = lm(x~m)
summary(fit) # show results
# Other useful functions
#coefficients(fit) # model coefficients
#confint(fit, level=0.95) # CIs for model parameters
#fitted(fit) # predicted values
#residuals(fit) # residuals
#anova(fit) # anova table
#vcov(fit) # covariance matrix for model parameters
#influence(fit) # regression diagnostics
# diagnostic plots
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
plot(fit)
# K-fold cross-validation
data=cbind(y,m,x);
data=data.frame(data)
cv.lm(df=data, form.lm=formula(x~m), m=3, dots=FALSE,
seed=50, plotit=TRUE, printit=TRUE) # 3 fold cross-validation
cv.lm(df=data, form.lm=formula(x~m), m=6, dots=FALSE,
seed=50, plotit=TRUE, printit=TRUE) # 6 fold cross-validation
cv.lm(df=data, form.lm=formula(x~m), m=8, dots=FALSE,
seed=50, plotit=TRUE, printit=TRUE) # 8 fold cross-validation
cv.lm(df=data, form.lm=formula(x~m), m=10, dots=FALSE,
seed=50, plotit=TRUE, printit=TRUE) # 10 fold cross-validation
}
Q No. 2
Use the dataset ”longley” from R data sets. This is a data frame with 7 economical
variables, observed yearly from 1947 to 1962 (n=16). Perform multiple regression to
see how number of people employed depends on other six variables mentioned in the
1
data set. Find out the leverage points, influencial points, outliers (if any) and the best
fitted model. (Use SAS)
variables, observed yearly from 1947 to 1962 (n=16). Perform multiple regression to
see how number of people employed depends on other six variables mentioned in the
1
data set. Find out the leverage points, influencial points, outliers (if any) and the best
fitted model. (Use SAS)
proc reg data="longley" ; model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ; run;
proc reg data="longley" ; model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll / stb; run;
proc reg data="longley" ; model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ; test1: test ell =0; run;
proc reg data="longley" ; model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll / stb; test2: test ell; run;
proc reg data="longley" ; model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ; test_class_size: test acs_k3, acs_46; run;
proc corr data="longley" ; var api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ; run;