Search This Blog

Sunday, April 08, 2012

R programming # 7


# 1. Consider the data set “FRWRD.txt” containing values in US $ of the 36 Henry Hub
# natural gas forward contracts traded on March 23rd 1998. Use different cross validation
# technique to choose best polynomial (from degree 3, 6 and 8) for fitting the data. Use
# 1 to 36 as explanatory variable for the sake of simplicity. (Use R)

cval=function()
{
library(DAAG)
require(graphics)

mydata=read.table("FRWRD.txt",header=FALSE);
data=mydata[2];
y=gl(6, 6, length = 36, labels = 1:6, ordered = FALSE)
m=seq(1,36,length=36);
# Multiple Linear Regression
data=t(data);
x=c(data);
fit = lm(x~m)
summary(fit) # show results

# Other useful functions
#coefficients(fit) # model coefficients
#confint(fit, level=0.95) # CIs for model parameters
#fitted(fit) # predicted values
#residuals(fit) # residuals
#anova(fit) # anova table
#vcov(fit) # covariance matrix for model parameters
#influence(fit) # regression diagnostics

# diagnostic plots
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
plot(fit)

# K-fold cross-validation
data=cbind(y,m,x);
data=data.frame(data)
cv.lm(df=data, form.lm=formula(x~m), m=3, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 3 fold cross-validation
cv.lm(df=data, form.lm=formula(x~m), m=6, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 6 fold cross-validation

cv.lm(df=data, form.lm=formula(x~m), m=8, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 8 fold cross-validation

cv.lm(df=data, form.lm=formula(x~m), m=10, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 10 fold cross-validation
}





Q No. 2
Use the dataset ”longley” from R data sets. This is a data frame with 7 economical
variables, observed yearly from 1947 to 1962 (n=16). Perform multiple regression to
see how number of people employed depends on other six variables mentioned in the
1
data set. Find out the leverage points, influencial points, outliers (if any) and the best
fitted model. (Use SAS)

proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
run; 


proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll / stb;
run;


proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
  test1: test ell =0;
run; 



proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll / stb;
  test2: test ell;
run; 



proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
  test_class_size: test acs_k3, acs_46;
run;  



proc corr data="longley"  ;
  var api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
run;


R Programming # 6


# 1. Draw the surface plot of bivariate gumbel copula with parameter
# equals to 1.5 and bivariate normal copula with parameter equals to 0.7.

surface=function()
{

library("copula");

gc=gumbelCopula(1.5, dim=2)
persp(gc, dcopula, col="red")

nc=normalCopula(0.7)
persp(nc, dcopula, col="blue")

}
=============================


# 2. Draw the contour plot of the density and cdf of bivariate gumbel
#copula with with parameter 1.4 and marginals N(3,4) and T(3).

cont=function()
{

library("copula");
#Density contour Plot
density = mvdc(copula = archmCopula(family = "gumbel", param = 1.4), margins = c("norm", "t"), paramMargins = list(list(mean = 0, sd = 1), list(df=3, ncp = 3)))

print(density)

contour(density, dmvdc, xlim = c(-2, 8), ylim = c(-2, 10)) ;

# CDF contour Plot

cdf=mvdc(copula = archmCopula(family = "gumbel", param = 1.4), margins = c("norm", "t"), paramMargins = list(list(mean = 0, sd = 1), list(df=3, ncp = 3)))
contour(cdf, pmvdc, xlim = c(-2, 20), ylim = c(-2, 20))

}

==============





# 3. Consider the monthly log returns of Boeing (BA), Abbott Labs (ABT),
# Mo- torola (MOT) and General Motors (GM) from January 1998 to December
# 2007. The log returns are in percentages.
# (a) Calculate the correlation co-efficient for MOT and GM.
# Generate 500 random number from gaussian copula taking parameter as
# this correlation coefficient. Draw contour plot of the empirical
# copula based on the generated sample and superimpose the empirical
# copula based on the data for MOT and GM.

corltn = function()
{

####### --------- Lybraries ------------#####
library("copula")
library("fCopulae")
data_set=read.table("m-ba4c9807.txt",header=TRUE);
BA=data_set[1];
ABT=data_set[2];
MOT=data_set[3];
GM=data_set[4];
MOTGM=data_set[3:4];
BA=t(BA);
ABT=t(ABT);
MOT=t(MOT);
GM=t(GM);
# 4 figures arranged in 2 rows and 2 columns
attach(mtcars)
par(mfrow=c(2,2))
rr=cor(t(MOT),t(GM), use = "everything", method = "pearson")
nc = normalCopula(-0.1231763)
r = rcopula(nc, 500)

# Probablity Empirical Copula:
empg=pempiricalCopula(r, N = 10)
contour(empg, pcopula, main="Probability Empirical contour plot of random gaussian data")

#par(new=TRUE)
empd=pempiricalCopula(MOTGM, N = 10)
contour(empd, pcopula, main="Probability Empirical contour plot of MOT and GM data")

# Density Empirical Copula:
empg=dempiricalCopula(r, N = 10)
contour(empg, dcopula, main="Density Empirical contour plot of random gaussian data")

#par(new=TRUE)
empd=dempiricalCopula(MOTGM, N = 10)
contour(empd, dcopula, main="Density Empirical contour plot of MOT and GM data")

}


===============================



****************** END ******************