Chapter B. An Introduction to R


Copyright 2016 Jon Danielsson. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at. http://www.apache.org/licenses/LICENSE-2.0. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Listing B.1: Variables
Last edited: 2011

x = 10  
x        
10      
		

Listing B.2: Vectors
Last edited: 2011

y = c(1,3,5,7,9)
y
1 3 5 7 9
y[3] 
5
dim(y)
NULL    
length(y)  
[1] 5
		

Listing B.3: Matrices
Last edited: 2011

y = matrix(nrow=2,ncol=3) 
y  
  [,1] [,2] [,3]
[1,]   NA   NA   NA
[2,]   NA   NA   NA
dim(y)
2 3
y = matrix(c(1,2,3),3,1) 
		

Listing B.4: Sequences
Last edited: 2011

seq(1:10)
[1]  1  2  3  4  5  6  7  8  9 10
seq(from=1, to=10, by=2)
[1]  1 3 5 7 9
seq(from=1, to=10, length=5)
[1]  1.00  3.25  5.50  7.75 10.00
		

Listing B.5: Import CVS data
Last edited: 2011

mydata = read.table("data.txt")
mydata = read.csv("data.csv")
		

Listing B.6: Import data with scan and matrix
Last edited: 2011

mydata = matrix(scan(file="data.dat"),byrow=TRUE,ncol=3)
		

Listing B.7: Download S&P-500 data in R
Last edited: 2011

library("tseries")  
price = get.hist.quote(instrument = "^gspc", start = "2000-01-01", quote="AdjClose") 
y=diff(log(price))    
y=coredata(y)
plot(y)                
		

Listing B.8: Basic data manipulation
Last edited: 2011

sum(y)       
prod(y)     
max(y)      
min(y)      
range(y)    
length(y)    
mean(y)      
median(y)    
var(y)      
cov(y)      
cor(y)      
sort(y)      
log(y)      
na.omit(y)  
unique(y)    
		

Listing B.9: Higher moments
Last edited: 2011

mean(y)
var(y)
sd(y)    
library(moments)
skewness(y)
kurtosis(y)
		

Listing B.10: Matrix multiplication
Last edited: 2011

z = matrix(c(1,2,3,4),2,2)   
x = matrix(c(1,2),1,2)      
z %*% x
Error in z %*% x : non-conformable arguments  
z %*% t(x)  
     [,1]
[1,]    7
[2,]   10
		

Listing B.11: Matrix manipulation
Last edited: 2011

m1 = matrix(c(1,2,3,4),2,2)   
m2 = matrix(1,nrow=2,ncol=2)   
rbind(m1,m2)
     [,1] [,2]
[1,]    1    3
[2,]    2    4
[3,]    1    1
[4,]    1    1
cbind(m1,m2)
     [,1] [,2] [,3] [,4]
[1,]    1    3    1    1
[2,]    2    4    1    1
diag(m1)
[1] 1 4
diag(2)
     [,1] [,2]
[1,]    1    0
[2,]    0    1
solve(m1)
     [,1] [,2]
[1,]   -2  1.5
[2,]    1 -0.5
eigen(m1)
$values
[1]  5.3722813 -0.3722813
$vectors
           [,1]       [,2]
[1,] -0.5657675 -0.9093767
[2,] -0.8245648  0.4159736
		

Listing B.12: Distribution functions
Last edited: 2011

q=seq(from=-3,to=-3,length=300)     
p=seq(from=0.01,to=0.99,length=300) 
rnorm(100, mean=0, sd=1)             
pnorm(q, mean=0, sd=1)               
dnorm(q, mean=0, sd=1)               
qnorm(p, mean=0, sd=1)               
		

Listing B.13: Common distributions
Last edited: 2011

S=10                            
df=3                            
rt(S,df)                         
rlnorm(S)                       
runif(S,min=0,max=1)             
rchisq(S,df)                     
rbinom(S,size=4,prob=0.1)        
rpois(S,lambda=0.1)             
rexp(S,rate=1)                   
rgamma(S,shape=2,scale=1)       
rweibull(S,shape=2,scale=1)     
rcauchy(S,location=0,scale=1)   
		

Listing B.14: Other distributions
Last edited: 2011

library(MASS)
mu = c(1,1)       
Sigma = matrix(c(1, 0.5, 0.5, 2),ncol=2) 
mvrnorm(S,mu,Sigma)           
library(mvtnorm)
rmvt(S,Sigma,df)             
library(Rlab)
rbern(S, prob=0.4)      Bernoulli
library(evir)
rgev(S, xi=1, mu=0, sigma=1)  
rgpd(S, xi=2, mu=0, beta=1)   
		

Listing B.15: Testing for normality
Last edited: 2011

library(moments) 
jarque.bera.test(y)
  Jarque-Bera Normality Test
data:  y 
JB = 339444.9, p-value < 2.2e-16
library(car)
qq.plot(y, distribution="norm",mean=0,sd=1) 
qq.plot(y, distribution="t",df=3) 
		

Listing B.16: ACF
Last edited: 2011

library(stats)
acf(y, lag.max=20, plot=TRUE) 
acf(y, lag.max=20, type="partial", plot=TRUE) 
pacf(y, lag.max=20, plot=TRUE) 
Box.test(y,lag=20,type="Ljung-Box") 
X-squared = 142.3885, df = 20, p-value < 2.2e-16
		

Listing B.17: ARMA models
Last edited: 2011

arima(y, order = c(1, 0, 0),include.mean = TRUE) 
arima(y, order = c(0, 0, 1),include.mean = FALSE) 
arima(y, order = c(1, 0, 1),include.mean = TRUE) 
Call:
arima(x = y, order = c(1, 0, 1), include.mean = TRUE)
Coefficients:
          ar1     ma1  intercept
      -0.4580  0.5002      2e-04
s.e.   0.0775  0.0754      1e-04
sigma^2 estimated as 0.0001349:  log likelihood = 64926.08,  aic = -129844.2
		

Listing B.18: Simulate ARMA
Last edited: 2011

x = arima.sim(list(order=c(1,0,0),ar=0.8),n=10) 
Time Series:
Start = 1 
End = 10 
Frequency = 1 
 [1] -0.3457084  1.6319438 -1.1513445 -1.2760566  0.1160679  0.5026084  1.4810065  0.8608933 -0.3298654  1.3049195
x1 = arima.sim(list(order=c(2,0,2), ar=c(0.9, -0.5), ma=c(-0.2, 0.2)),n=300) 
		

Listing B.19: A simple function
Last edited: 2011

mykurtosis = function(x) {  
   m4 = mean((x-mean(x))^4) 
   kurt = m4/(sd(x)^4)-3  
   kurt
}  
mykurtosis(y)
[1] 7.40799
		

Listing B.20: Programming kurtosis
Last edited: 2011

mykurtosis1 = function(x,excess=3) {  
   m4 = mean((x-mean(x))^4) 
   kurt = m4/(sd(x)^4)-excess  
   kurt
}
mykurtosis1(y)
[1] 7.40799
mykurtosis1(y,0)
[1] 10.40799
		

Listing B.21: A for loop
Last edited: 2011

x = rnorm(5) 
z = numeric(length(x)) 
for (i in 1:length(x)){
  z[i]=x[i]+0.5
}
		

Listing B.22: An if-else loop
Last edited: 2011

a = 10
if (a %% 3==0) {
  print("a is a multiple of 3") 
}else{
  print("a is not a multiple of 3")
}
		

Listing B.23: A while loop
Last edited: 2011

a = 1
n = 1
while (a<100){
  a = a*n
  n = n + 1
}
		

Listing B.24: Loop avoidance
Last edited: 2011

x = rnorm(5) 
for (i in 1:length(x)) if(x[i]<0) x[i]=0
x[x<0] = 0
		

Listing B.25: Normal likelihood function
Last edited: 2011

 norm_loglik = function(theta,x){
   n = length(x)
   mu = theta[1]
   sigma2 = theta[2]^2
   loglike = -0.5*n*log(sigma2) - 0.5*(sum((x-mu)^2/sigma2)) 
   return(-loglike)
}
		

Listing B.26: Maximum likelihood example
Last edited: 2011

x = rnorm(100,mean=3,sd=2)
theta.start = c(median(x),IQR(x)/2) 
out = nlm(norm_loglik,theta.start,x=x,hessian=TRUE,iterlim=100)
out
$minimum  
[1] 115.8623
$estimate  
[1]  3.130798 -1.932131
$gradient  
[1] -2.414775e-06  9.414420e-07
$hessian
             [,1]         [,2]
[1,] 26.787168484  0.004294851
[2,]  0.004294851 53.588122739
$code
[1] 1
$iterations 
[1] 16
  solve(out$hessian) 
              [,1]          [,2]
[1,]  3.733131e-02 -2.991939e-06
[2,] -2.991939e-06  1.866085e-02
		

Listing B.27: Save plot in R
Last edited: 2011

postscript("file.eps", horizontal=FALSE, onefile=FALSE, height=5, width=5, pointsize=10, useKerning=FALSE)
plot(y,type='l')
dev.off()