A Data Scientist's blog: Sunday, April 08, 2012

Sunday, April 08, 2012

R programming # 7

# 1. Consider the data set “FRWRD.txt” containing values in US $ of the 36 Henry Hub

# natural gas forward contracts traded on March 23rd 1998. Use different cross validation

# technique to choose best polynomial (from degree 3, 6 and 8) for fitting the data. Use

# 1 to 36 as explanatory variable for the sake of simplicity. (Use R)

cval=function()

{

library(DAAG)

require(graphics)

mydata=read.table("FRWRD.txt",header=FALSE);

data=mydata[2];

y=gl(6, 6, length = 36, labels = 1:6, ordered = FALSE)

m=seq(1,36,length=36);

# Multiple Linear Regression

data=t(data);

x=c(data);

fit = lm(x~m)

summary(fit) # show results

# Other useful functions

#coefficients(fit) # model coefficients

#confint(fit, level=0.95) # CIs for model parameters

#fitted(fit) # predicted values

#residuals(fit) # residuals

#anova(fit) # anova table

#vcov(fit) # covariance matrix for model parameters

#influence(fit) # regression diagnostics

# diagnostic plots

layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page

plot(fit)

# K-fold cross-validation

data=cbind(y,m,x);

data=data.frame(data)

cv.lm(df=data, form.lm=formula(x~m), m=3, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 3 fold cross-validation

cv.lm(df=data, form.lm=formula(x~m), m=6, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 6 fold cross-validation

cv.lm(df=data, form.lm=formula(x~m), m=8, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 8 fold cross-validation

cv.lm(df=data, form.lm=formula(x~m), m=10, dots=FALSE, seed=50, plotit=TRUE, printit=TRUE) # 10 fold cross-validation

}

Q No. 2

Use the dataset ”longley” from R data sets. This is a data frame with 7 economical
variables, observed yearly from 1947 to 1962 (n=16). Perform multiple regression to
see how number of people employed depends on other six variables mentioned in the
1
data set. Find out the leverage points, influencial points, outliers (if any) and the best
fitted model. (Use SAS)

proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
run;

proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll / stb;
run;

proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
  test1: test ell =0;
run;

proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll / stb;
  test2: test ell;
run;

proc reg data="longley"  ;
  model api00 = ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
  test_class_size: test acs_k3, acs_46;
run;

proc corr data="longley"  ;
  var api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll ;
run;

R Programming # 6

# 1. Draw the surface plot of bivariate gumbel copula with parameter

# equals to 1.5 and bivariate normal copula with parameter equals to 0.7.

surface=function()

{

library("copula");

gc=gumbelCopula(1.5, dim=2)

persp(gc, dcopula, col="red")

nc=normalCopula(0.7)

persp(nc, dcopula, col="blue")

}

=============================

# 2. Draw the contour plot of the density and cdf of bivariate gumbel

#copula with with parameter 1.4 and marginals N(3,4) and T(3).

cont=function()

{

library("copula");

#Density contour Plot

density = mvdc(copula = archmCopula(family = "gumbel", param = 1.4), margins = c("norm", "t"), paramMargins = list(list(mean = 0, sd = 1), list(df=3, ncp = 3)))

print(density)

contour(density, dmvdc, xlim = c(-2, 8), ylim = c(-2, 10)) ;

# CDF contour Plot

cdf=mvdc(copula = archmCopula(family = "gumbel", param = 1.4), margins = c("norm", "t"), paramMargins = list(list(mean = 0, sd = 1), list(df=3, ncp = 3)))

contour(cdf, pmvdc, xlim = c(-2, 20), ylim = c(-2, 20))

}

==============

# 3. Consider the monthly log returns of Boeing (BA), Abbott Labs (ABT),

# Mo- torola (MOT) and General Motors (GM) from January 1998 to December

# 2007. The log returns are in percentages.

# (a) Calculate the correlation co-efficient for MOT and GM.

# Generate 500 random number from gaussian copula taking parameter as

# this correlation coefficient. Draw contour plot of the empirical

# copula based on the generated sample and superimpose the empirical

# copula based on the data for MOT and GM.

corltn = function()

{

####### --------- Lybraries ------------#####

library("copula")

library("fCopulae")

data_set=read.table("m-ba4c9807.txt",header=TRUE);

BA=data_set[1];

ABT=data_set[2];

MOT=data_set[3];

GM=data_set[4];

MOTGM=data_set[3:4];

BA=t(BA);

ABT=t(ABT);

MOT=t(MOT);

GM=t(GM);

# 4 figures arranged in 2 rows and 2 columns

attach(mtcars)

par(mfrow=c(2,2))

rr=cor(t(MOT),t(GM), use = "everything", method = "pearson")

nc = normalCopula(-0.1231763)

r = rcopula(nc, 500)

# Probablity Empirical Copula:

empg=pempiricalCopula(r, N = 10)

contour(empg, pcopula, main="Probability Empirical contour plot of random gaussian data")

#par(new=TRUE)

empd=pempiricalCopula(MOTGM, N = 10)

contour(empd, pcopula, main="Probability Empirical contour plot of MOT and GM data")

# Density Empirical Copula:

empg=dempiricalCopula(r, N = 10)

contour(empg, dcopula, main="Density Empirical contour plot of random gaussian data")

#par(new=TRUE)

empd=dempiricalCopula(MOTGM, N = 10)

contour(empd, dcopula, main="Density Empirical contour plot of MOT and GM data")

}

===============================

****************** END ******************

Search This Blog

Sunday, April 08, 2012

R programming # 7

R Programming # 6