14  Backtesting

14.1 Libraries

source('functions.r')
suppressPackageStartupMessages(library(rugarch))
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(zoo))
library(parallel)

14.2 Data

load('data/data.ts.RData')
sp500=data.ts$sp500
Ticker=data.ts$Ticker
names(data)
names(sp500)
  1. 'Return'
  2. 'Price'
  3. 'UnAdjustedPrice'
  4. 'sp500'
  5. 'sp500tr'
  6. 'Ticker'
  7. 'Price.ts'
  8. 'Return.ts'
  9. 'UnAdjustedPrice.ts'
  1. 'date'
  2. 'price'
  3. 'y'
  4. 'date.ts'
  5. 'y.ts'
  6. 'price.ts'

14.3 Setup

Specify the VaR probability, portfolio value, estimation window size and portfolio weight:

p <- 0.01
value = 1000
WE = 2000
T=length(sp500$y)
WT=T-WE
w = rep(1/length(Ticker),length(Ticker))
cat("p=",p,", value=",value,", WE=",WE,"T=",T,"WT=",WT,", w:",w,"\n")
p= 0.01 , value= 1000 , WE= 2000 T= 5034 WT= 3034 , w: 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 

14.4 Forecast VaR

14.5 Historical simulation — HS

DEBUG=FALSE

violations=rep(NA,length=T)
VaR=rep(NA,length=T)
for(i in (WE+1):T){
    t1=i-WE
    t2=i-1
    data=sp500$y[t1:t2]
    if(DEBUG) cat(i,t1,t2,length(data),"\n")
    VaR[i]=Risk.HS(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR

}
plot(VaR,type='s')

If we want to do this for several methods it is better to do is all in the same loop as that minimises the chance of mistakes. This will take a long time, and we print the estimation time at the end.

DEBUG=FALSE
START=WE+1

time1=Sys.time()
sp500$VaR.HS=rep(NA,length=T)
sp500$VaR.HS500=rep(NA,length=T)

sp500$VaR.EWMA=rep(NA,length=T)
sp500$VaR.GARCH=rep(NA,length=T)
sp500$VaR.tGARCH=rep(NA,length=T)

for(i in START:T){
    t1=i-WE
    t2=i-1
    data=sp500$y[t1:t2]
    if(DEBUG) cat(i,t1,t2,"/",T,length(data),"\n")
    sp500$VaR.HS[i]=Risk.HS(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.HS500[i]=Risk.HS(y=data,WE=500,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.EWMA[i]=Risk.EWMA(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.GARCH[i]=Risk.GARCH(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.tGARCH[i]=Risk.tGARCH(y=data,p=p,WE=WE,value=value,Print=FALSE)$risk$VaR
}
time2=Sys.time()

This will take quite some time to calculate. We could speed it up in several ways. Use multi-core calculations, and perhaps use our own algo, and feed into it as starting values the result from the previous days optimisation.

print(time2-time1)
Time difference of 11.60907 mins

At the end of this Chapter we show how multi-core calculations can be used to make it much faster.

14.5.1 Presenting results

Some columns in sp500 have VaR in the name. We want to plot those, and can manually put those into the plot command. But it is safer to search for those columns and plot them

print(head(sp500,2))
print(names(sp500))
VaR=grep("VaR",names(sp500))
print(VaR)
print(tail(sp500[,VaR],2))
      date  price             y    date.ts          y.ts price.ts VaR.HS
2 20030103 908.59 -0.0004841496 2003-01-03 -0.0004841496   908.59     NA
3 20030106 929.01  0.0222255557 2003-01-06  0.0222255557   929.01     NA
  VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
2        NA       NA        NA         NA
3        NA       NA        NA         NA
 [1] "date"       "price"      "y"          "date.ts"    "y.ts"      
 [6] "price.ts"   "VaR.HS"     "VaR.HS500"  "VaR.EWMA"   "VaR.GARCH" 
[11] "VaR.tGARCH"
[1]  7  8  9 10 11
     VaR.HS VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
5034 35.758    53.222   30.682    26.042     30.241
5035 35.758    53.222   30.535    29.433     33.376

We can then plot the VaR columns.

print(head(sp500))
plot(sp500$date.ts,sp500$VaR.EWMA)
      date  price             y    date.ts          y.ts price.ts VaR.HS
2 20030103 908.59 -0.0004841496 2003-01-03 -0.0004841496   908.59     NA
3 20030106 929.01  0.0222255557 2003-01-06  0.0222255557   929.01     NA
4 20030107 922.93 -0.0065661110 2003-01-07 -0.0065661110   922.93     NA
5 20030108 909.93 -0.0141857185 2003-01-08 -0.0141857185   909.93     NA
6 20030109 927.57  0.0192005899 2003-01-09  0.0192005899   927.57     NA
7 20030110 927.57  0.0000000000 2003-01-10  0.0000000000   927.57     NA
  VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
2        NA       NA        NA         NA
3        NA       NA        NA         NA
4        NA       NA        NA         NA
5        NA       NA        NA         NA
6        NA       NA        NA         NA
7        NA       NA        NA         NA

par(mar=c(3,3,1,4),mfrow=c(1,1))


matplot(sp500$date.ts,
    sp500[,VaR],
    lty=1,
    type='l',
    col=c("blue","red"),
    lwd=c(1,2),
    las=1,
    ylab="returns",
    yaxt='n',
    bty='l'
)
w=pretty(sp500$VaR.HS)

axis(2,at=w,label=paste0("$",w),las=1)

violations=rep(NA,length=T)

eta= sp500$y < -(sp500$VaR.HS/value)
eta=eta[!is.na(eta)]
cat(WT,length(eta),sum(eta),sum(eta)/(WT*p),"\n")
plot(sp500$VaR.HS,type='s',bty='l',las=1,col="red")
3034 3034 35 1.153593 

We can superimpose the VaR onto the returns

par(mar=c(3,3,1,4))
matplot(
    cbind(sp500$y,-sp500$VaR.HS/value),
    lty=1,
    type='l',
    col=c("black","red"),
    lwd=c(1,2),
    las=1,
    ylab="returns",
    yaxt='n',
    bty='u'
)
w=pretty(sp500$y,n=10)
axis(2,at=w,label=paste0(100*w,"%"),las=1)
w=w[w<0]
axis(4,at=w,label=paste0("$",-value*w),las=1)

14.5.2 Zoom into Covid

We can zoom into the Covid-19 period in the first part of 2020. We filtered to the data by not a number (NA) and also create a column,dot, the days when VaR was violated so we can label the plot with red dots below.

res=sp500
res$VaR = -res$VaR.HS/value
res=res[!is.na(res$VaR),]

res$dot=NA
res$dot[res$y < res$VaR] = res$y[ res$y < res$VaR]

There are two main ways we can do the filter by date, either by converting the data into zoo object or simply directly by using the date range. We follow the second way below, but both would work. Note that the code would be different depending on the way we chose.

The zoo and window way.

res=zoo(res,order.by=res$date.ts)
res=window(res,start=ymd(20200201),end=ymd(20200701))
res=res[res$date.ts>=ymd(20200101) &res$date.ts<=ymd(20200701), ]

We then made the main plot.

par(mar=c(3,4,1,6))
matplot(res$date.ts,res[,c("y","VaR")],
     main="Risk in Covid (HS VaR)",
    lty=1,
    type='l',
    col=c("black","red"),
    lwd=c(1,2),
    las=1,
    ylab="returns",
    yaxt='n'
)

points(res$date.ts,res$dot,pch=16,col="red",cex=1.5)

w=pretty(res$y,n=5)
axis(2,at=w,label=paste0(value*w,"%"),las=1)
w=w[w<0]
axis(4,at=w,label=paste0("$",-value*w),las=1)
mtext("return",2,outer = TRUE)
mtext("VaR",4,outer = FALSE)

14.6 Using many cores for the calculations

The calculation of the backtests took a long time. Since every computer has multiple cores, we can run the calculations on all the cores at the same time. While that does not work of every type of calculation, the backtests are well suited for it.

Start by making a function that does the calculations for each day and returns of vector with the risk forecasts.

tmpfn=function(i){
    t1=i-WE
    t2=i-1
    data=sp500$y[t1:t2]
    res=vector(length=5)
    res[1]=Risk.HS(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    res[2]=Risk.HS(y=data,WE=500,p=p,value=value,Print=FALSE)$risk$VaR
    res[3]=Risk.EWMA(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    res[4]=Risk.GARCH(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    res[5]=Risk.tGARCH(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    return(res)
}
x=tmpfn(2000)

There are several ways we can use to execute multiple calculations in parallel in R. We start by loading the library parallel. The way forward depends whether we are on a Windows machine or a unix computer, like Linux or Mac. If on Windows, we need to use commands like do for, while it is easier to use mclapply under unix.

Just like in the example above, we loop over observations START:T. We also need to set the number of course we what do use for the calculation.

time1=Sys.time()
res=mclapply(START:T, FUN=tmpfn, mc.cores =8)
time2=Sys.time()
print(time2-time1)
Time difference of 1.879544 mins

This takes about 1/6 of the time the single core calculation above took, not 1/8, as there are overheads with multi core calculations.

mclapply returns a list, with one entry per day, each with five risk calculations.

length(res)
class(res)
res[1]
res[[1]]
3034
'list'
    1. 42.593
    2. 69.482
    3. 20.529
    4. 20.513
    5. 22.12
  1. 42.593
  2. 69.482
  3. 20.529
  4. 20.513
  5. 22.12

We can convert the list into a long vector, using unlist, where the first five elements are the first five risk estimates, etc., and then converting that into a matrix with five columns, and one row a day. After converting the matrix into a dataframe, we can then name the columns.

res.ul=unlist(res)
length(res.ul)
length(res[[1]])
res.m=matrix(unlist(res),ncol=length(res[[1]]),byrow=TRUE)
res.df=as.data.frame(res.m)
dim(res.df)
names(res.df)=c("VaR.HS","VaR.HS500","VaR.EWMA","VaR.GARCH","VaR.tGARCH")
head(res.df,2)
15170
5
  1. 3034
  2. 5
A data.frame: 2 × 5
VaR.HSVaR.HS500VaR.EWMAVaR.GARCHVaR.tGARCH
<dbl><dbl><dbl><dbl><dbl>
142.59369.48220.52920.51322.120
242.59369.48220.19419.82121.376

The first risk estimate is for day START, and we want to create a data frame with the same number of rows as the data, and we do that by filling it with rows containing only NA.

df = t(data.frame(rep(NA,length(res[[1]])))) 
df=as.data.frame(df[rep(seq_len(nrow(df)), each = START-1), ])
class(df)
names(df)=names(res.df)
res.df.full=rbind(df,res.df)
rownames(res.df.full)=NULL
print(rbind(head(res.df.full,1),tail(res.df.full,1)))
dim(res.df.full)
dim(sp500)
'data.frame'
     VaR.HS VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
1        NA        NA       NA        NA         NA
5034 35.758    53.222   30.535    29.433     33.376
  1. 5034
  2. 5
  1. 5034
  2. 11

We then want to make a matrix with returns and the risk estimates, one that looks the same as what we did above. In this case, we do that by copying the first six columns of the sp500 into a new dataframe called sp500.parallel, and using cbind to append the risk columns to it.

sp500.parallel=sp500[,c("date","price","y","date.ts","y.ts","price.ts")]
sp500.parallel=cbind(sp500.parallel,res.df.full)
tail(sp500.parallel)
dim(sp500.parallel)
dim(sp500)
A data.frame: 6 × 11
datepriceydate.tsy.tsprice.tsVaR.HSVaR.HS500VaR.EWMAVaR.GARCHVaR.tGARCH
<int><dbl><dbl><date><zoo><zoo><dbl><dbl><dbl><dbl><dbl>
5030202212223822.39-0.0145571302022-12-22-0.0145571303822.3935.75853.22232.13129.00633.374
5031202212233844.82 0.0058509592022-12-23 0.0058509593844.8235.75853.22232.26730.27534.545
5032202212273829.25-0.0040578522022-12-27-0.0040578523829.2535.75853.22232.36527.60832.017
5033202212283783.22-0.0120934632022-12-28-0.0120934633783.2235.75853.22231.55625.10629.559
5034202212293849.28 0.0173106192022-12-29 0.0173106193849.2835.75853.22230.68226.04230.241
5035202212303839.50-0.0025439682022-12-30-0.0025439683839.5035.75853.22230.53529.43333.376
  1. 5034
  2. 11
  1. 5034
  2. 11

As a final assurance, we test to see if the two dataframes are identical.

identical(sp500,sp500.parallel)
TRUE