14 Backtesting

14.1 Libraries

source('functions.r')
suppressPackageStartupMessages(library(rugarch))
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(zoo))
library(parallel)

14.2 Data

load('data/data.ts.RData')
sp500=data.ts$sp500
Ticker=data.ts$Ticker
names(data)
names(sp500)

'Return'
'Price'
'UnAdjustedPrice'
'sp500'
'sp500tr'
'Ticker'
'Price.ts'
'Return.ts'
'UnAdjustedPrice.ts'

'date'
'price'
'y'
'date.ts'
'y.ts'
'price.ts'

14.3 Setup

Specify the VaR probability, portfolio value, estimation window size and portfolio weight:

p <- 0.01
value = 1000
WE = 2000
T=length(sp500$y)
WT=T-WE
w = rep(1/length(Ticker),length(Ticker))
cat("p=",p,", value=",value,", WE=",WE,"T=",T,"WT=",WT,", w:",w,"\n")

p= 0.01 , value= 1000 , WE= 2000 T= 5034 WT= 3034 , w: 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667

14.4 Forecast VaR

14.5 Historical simulation — HS

DEBUG=FALSE

violations=rep(NA,length=T)
VaR=rep(NA,length=T)
for(i in (WE+1):T){
    t1=i-WE
    t2=i-1
    data=sp500$y[t1:t2]
    if(DEBUG) cat(i,t1,t2,length(data),"\n")
    VaR[i]=Risk.HS(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR

}
plot(VaR,type='s')

If we want to do this for several methods it is better to do is all in the same loop as that minimises the chance of mistakes. This will take a long time, and we print the estimation time at the end.

DEBUG=FALSE
START=WE+1

time1=Sys.time()
sp500$VaR.HS=rep(NA,length=T)
sp500$VaR.HS500=rep(NA,length=T)

sp500$VaR.EWMA=rep(NA,length=T)
sp500$VaR.GARCH=rep(NA,length=T)
sp500$VaR.tGARCH=rep(NA,length=T)

for(i in START:T){
    t1=i-WE
    t2=i-1
    data=sp500$y[t1:t2]
    if(DEBUG) cat(i,t1,t2,"/",T,length(data),"\n")
    sp500$VaR.HS[i]=Risk.HS(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.HS500[i]=Risk.HS(y=data,WE=500,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.EWMA[i]=Risk.EWMA(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.GARCH[i]=Risk.GARCH(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    sp500$VaR.tGARCH[i]=Risk.tGARCH(y=data,p=p,WE=WE,value=value,Print=FALSE)$risk$VaR
}
time2=Sys.time()

This will take quite some time to calculate. We could speed it up in several ways. Use multi-core calculations, and perhaps use our own algo, and feed into it as starting values the result from the previous days optimisation.

print(time2-time1)

Time difference of 11.60907 mins

At the end of this Chapter we show how multi-core calculations can be used to make it much faster.

14.5.1 Presenting results

Some columns in sp500 have VaR in the name. We want to plot those, and can manually put those into the plot command. But it is safer to search for those columns and plot them

print(head(sp500,2))
print(names(sp500))
VaR=grep("VaR",names(sp500))
print(VaR)
print(tail(sp500[,VaR],2))

      date  price             y    date.ts          y.ts price.ts VaR.HS
2 20030103 908.59 -0.0004841496 2003-01-03 -0.0004841496   908.59     NA
3 20030106 929.01  0.0222255557 2003-01-06  0.0222255557   929.01     NA
  VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
2        NA       NA        NA         NA
3        NA       NA        NA         NA

 [1] "date"       "price"      "y"          "date.ts"    "y.ts"      
 [6] "price.ts"   "VaR.HS"     "VaR.HS500"  "VaR.EWMA"   "VaR.GARCH" 
[11] "VaR.tGARCH"

[1]  7  8  9 10 11

     VaR.HS VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
5034 35.758    53.222   30.682    26.042     30.241
5035 35.758    53.222   30.535    29.433     33.376

We can then plot the VaR columns.

print(head(sp500))
plot(sp500$date.ts,sp500$VaR.EWMA)

      date  price             y    date.ts          y.ts price.ts VaR.HS
2 20030103 908.59 -0.0004841496 2003-01-03 -0.0004841496   908.59     NA
3 20030106 929.01  0.0222255557 2003-01-06  0.0222255557   929.01     NA
4 20030107 922.93 -0.0065661110 2003-01-07 -0.0065661110   922.93     NA
5 20030108 909.93 -0.0141857185 2003-01-08 -0.0141857185   909.93     NA
6 20030109 927.57  0.0192005899 2003-01-09  0.0192005899   927.57     NA
7 20030110 927.57  0.0000000000 2003-01-10  0.0000000000   927.57     NA
  VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
2        NA       NA        NA         NA
3        NA       NA        NA         NA
4        NA       NA        NA         NA
5        NA       NA        NA         NA
6        NA       NA        NA         NA
7        NA       NA        NA         NA

par(mar=c(3,3,1,4),mfrow=c(1,1))


matplot(sp500$date.ts,
    sp500[,VaR],
    lty=1,
    type='l',
    col=c("blue","red"),
    lwd=c(1,2),
    las=1,
    ylab="returns",
    yaxt='n',
    bty='l'
)
w=pretty(sp500$VaR.HS)

axis(2,at=w,label=paste0("$",w),las=1)

violations=rep(NA,length=T)

eta= sp500$y < -(sp500$VaR.HS/value)
eta=eta[!is.na(eta)]
cat(WT,length(eta),sum(eta),sum(eta)/(WT*p),"\n")
plot(sp500$VaR.HS,type='s',bty='l',las=1,col="red")

3034 3034 35 1.153593

We can superimpose the VaR onto the returns

par(mar=c(3,3,1,4))
matplot(
    cbind(sp500$y,-sp500$VaR.HS/value),
    lty=1,
    type='l',
    col=c("black","red"),
    lwd=c(1,2),
    las=1,
    ylab="returns",
    yaxt='n',
    bty='u'
)
w=pretty(sp500$y,n=10)
axis(2,at=w,label=paste0(100*w,"%"),las=1)
w=w[w<0]
axis(4,at=w,label=paste0("$",-value*w),las=1)

14.5.2 Zoom into Covid

We can zoom into the Covid-19 period in the first part of 2020. We filtered to the data by not a number (NA) and also create a column,dot, the days when VaR was violated so we can label the plot with red dots below.

res=sp500
res$VaR = -res$VaR.HS/value
res=res[!is.na(res$VaR),]

res$dot=NA
res$dot[res$y < res$VaR] = res$y[ res$y < res$VaR]

There are two main ways we can do the filter by date, either by converting the data into zoo object or simply directly by using the date range. We follow the second way below, but both would work. Note that the code would be different depending on the way we chose.

The zoo and window way.

res=zoo(res,order.by=res$date.ts)
res=window(res,start=ymd(20200201),end=ymd(20200701))

res=res[res$date.ts>=ymd(20200101) &res$date.ts<=ymd(20200701), ]

We then made the main plot.

par(mar=c(3,4,1,6))
matplot(res$date.ts,res[,c("y","VaR")],
     main="Risk in Covid (HS VaR)",
    lty=1,
    type='l',
    col=c("black","red"),
    lwd=c(1,2),
    las=1,
    ylab="returns",
    yaxt='n'
)

points(res$date.ts,res$dot,pch=16,col="red",cex=1.5)

w=pretty(res$y,n=5)
axis(2,at=w,label=paste0(value*w,"%"),las=1)
w=w[w<0]
axis(4,at=w,label=paste0("$",-value*w),las=1)
mtext("return",2,outer = TRUE)
mtext("VaR",4,outer = FALSE)

14.6 Using many cores for the calculations

The calculation of the backtests took a long time. Since every computer has multiple cores, we can run the calculations on all the cores at the same time. While that does not work of every type of calculation, the backtests are well suited for it.

Start by making a function that does the calculations for each day and returns of vector with the risk forecasts.

tmpfn=function(i){
    t1=i-WE
    t2=i-1
    data=sp500$y[t1:t2]
    res=vector(length=5)
    res[1]=Risk.HS(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    res[2]=Risk.HS(y=data,WE=500,p=p,value=value,Print=FALSE)$risk$VaR
    res[3]=Risk.EWMA(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    res[4]=Risk.GARCH(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    res[5]=Risk.tGARCH(y=data,WE=WE,p=p,value=value,Print=FALSE)$risk$VaR
    return(res)
}
x=tmpfn(2000)

There are several ways we can use to execute multiple calculations in parallel in R. We start by loading the library parallel. The way forward depends whether we are on a Windows machine or a unix computer, like Linux or Mac. If on Windows, we need to use commands like do for, while it is easier to use mclapply under unix.

Just like in the example above, we loop over observations START:T. We also need to set the number of course we what do use for the calculation.

time1=Sys.time()
res=mclapply(START:T, FUN=tmpfn, mc.cores =8)
time2=Sys.time()
print(time2-time1)

Time difference of 1.879544 mins

This takes about 1/6 of the time the single core calculation above took, not 1/8, as there are overheads with multi core calculations.

mclapply returns a list, with one entry per day, each with five risk calculations.

length(res)
class(res)
res[1]
res[[1]]

3034

'list'

1. 42.593
2. 69.482
3. 20.529
4. 20.513
5. 22.12

42.593
69.482
20.529
20.513
22.12

We can convert the list into a long vector, using unlist, where the first five elements are the first five risk estimates, etc., and then converting that into a matrix with five columns, and one row a day. After converting the matrix into a dataframe, we can then name the columns.

res.ul=unlist(res)
length(res.ul)
length(res[[1]])
res.m=matrix(unlist(res),ncol=length(res[[1]]),byrow=TRUE)
res.df=as.data.frame(res.m)
dim(res.df)
names(res.df)=c("VaR.HS","VaR.HS500","VaR.EWMA","VaR.GARCH","VaR.tGARCH")
head(res.df,2)

15170

3034
5

A data.frame: 2 × 5
	VaR.HS	VaR.HS500	VaR.EWMA	VaR.GARCH	VaR.tGARCH
	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
1	42.593	69.482	20.529	20.513	22.120
2	42.593	69.482	20.194	19.821	21.376

The first risk estimate is for day START, and we want to create a data frame with the same number of rows as the data, and we do that by filling it with rows containing only NA.

df = t(data.frame(rep(NA,length(res[[1]])))) 
df=as.data.frame(df[rep(seq_len(nrow(df)), each = START-1), ])
class(df)
names(df)=names(res.df)
res.df.full=rbind(df,res.df)
rownames(res.df.full)=NULL
print(rbind(head(res.df.full,1),tail(res.df.full,1)))
dim(res.df.full)
dim(sp500)

'data.frame'

     VaR.HS VaR.HS500 VaR.EWMA VaR.GARCH VaR.tGARCH
1        NA        NA       NA        NA         NA
5034 35.758    53.222   30.535    29.433     33.376

5034
5

5034
11

We then want to make a matrix with returns and the risk estimates, one that looks the same as what we did above. In this case, we do that by copying the first six columns of the sp500 into a new dataframe called sp500.parallel, and using cbind to append the risk columns to it.

sp500.parallel=sp500[,c("date","price","y","date.ts","y.ts","price.ts")]
sp500.parallel=cbind(sp500.parallel,res.df.full)
tail(sp500.parallel)
dim(sp500.parallel)
dim(sp500)

A data.frame: 6 × 11
	date	price	y	date.ts	y.ts	price.ts	VaR.HS	VaR.HS500	VaR.EWMA	VaR.GARCH	VaR.tGARCH
	<int>	<dbl>	<dbl>	<date>	<zoo>	<zoo>	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
5030	20221222	3822.39	-0.014557130	2022-12-22	-0.014557130	3822.39	35.758	53.222	32.131	29.006	33.374
5031	20221223	3844.82	0.005850959	2022-12-23	0.005850959	3844.82	35.758	53.222	32.267	30.275	34.545
5032	20221227	3829.25	-0.004057852	2022-12-27	-0.004057852	3829.25	35.758	53.222	32.365	27.608	32.017
5033	20221228	3783.22	-0.012093463	2022-12-28	-0.012093463	3783.22	35.758	53.222	31.556	25.106	29.559
5034	20221229	3849.28	0.017310619	2022-12-29	0.017310619	3849.28	35.758	53.222	30.682	26.042	30.241
5035	20221230	3839.50	-0.002543968	2022-12-30	-0.002543968	3839.50	35.758	53.222	30.535	29.433	33.376

5034
11

5034
11

As a final assurance, we test to see if the two dataframes are identical.

identical(sp500,sp500.parallel)

TRUE