Appendix - Introduction (in R/MATLAB)


Copyright 2011 - 2019 Jon Danielsson. This code is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. The GNU General Public License is available at: https://www.gnu.org/licenses/.


Listing R.1: Entering and Printing Data in R
Last updated June 2018

x = 10   # assign x the value 10
print(x) # print x
		
Listing M.1: Entering and Printing Data
Last updated June 2018

x = 10; % assign x the value 10, silencing output print with ;
disp(x) % display x
		

Listing R.2: Vectors, Matrices and Sequences in R
Last updated June 2018

y = c(1,3,5,7,9)                   # create vector using c()
print(y)
print(y[3])                        # calling 3rd element (R indices start at 1)
print(dim(y))                      # gives NULL since y is a vector, not a matrix
print(length(y))                   # as expected, y has length 5
v = matrix(nrow=2,ncol=3)          # fill a 2 x 3 matrix with NaN values (default)
print(dim(v))                      # as expected, v is size (2,3)
w = matrix(c(1,2,3),nrow=6,ncol=3) # repeats matrix twice by rows, thrice by columns
print(w)
s = 1:10                           # s is a list of integers from 1 to 10 inclusive
print(s)
		
Listing M.2: Vectors, Matrices and Sequences
Last updated June 2018

y = [1,3,5,7,9]            % lists are denoted by square brackets
y(3)                       % calling 3rd element (MATLAB indices start at 1)
size(y)                    % shows that y is 1 x 5 (a row vector, by default)
length(y)                  % as expected, y has length 5
v = nan(2,3)               % fill a 2 x 3 matrix with NaN values
size(v)                    % as expected, v is size (2,3)
w = repmat([1,2,3]', 2, 3) % repeats matrix twice by rows, thrice by columns
s = 1:10                   % s is a list of integers from 1 to 10 inclusive
		

Listing R.3: Importing Data in R
Last updated June 2018

## There are many data sources for financial data, for instance
## Yahoo Finance, AlphaVantage and Quandl. However, some of the
## free data sources have numerous issues with accuracy and
## handling of missing data, so only CSV importing is shown here.
##
## For csv data, one can use read.csv to read it
##
## Example:
## data = read.csv('Ch1aprices.csv', header=TRUE, sep=',')
## one can use the zoo() function from the package zoo
## to turn the data into a timeseries (see Listing 1.1/1.2)
		
Listing M.3: Importing Data
Last updated June 2018

%% There are many data sources for financial data, for instance
%% Yahoo Finance, AlphaVantage and Quandl. However, some of the
%% free data sources have numerous issues with accuracy and
%% handling of missing data, so only CSV importing is shown here.
%%
%% For csv data, one can use csvread to read it
%%
%% Example:
%% data = csvread('data.csv', 1, 0);
%% the two numbers behind are the row offset and column offset
%% so here we ignore the first row (ie. the header)
		

Listing R.4: Basic Summary Statistics in R
Last updated June 2018

y=matrix(c(3.1,4.15,9))
sum(y)                  # sum of all elements of y
prod(y)                 # product of all elements of y
max(y)                  # maximum value of y
min(y)                  # minimum value of y
range(y)                # min, max value of y
mean(y)                 # arithmetic mean
median(y)               # median
var(y)                  # variance
cov(y)                  # covar matrix = variance for single vector
cor(y)                  # corr matrix = [1] for single vector
sort(y)                 # sorting in ascending order
log(y)                  # natural log
		
Listing M.4: Basic Summary Statistics
Last updated June 2018

y = [3.14,15,9.26,5];
sum(y)                % sum of all elements of y
prod(y)               % product of all elements of y
max(y)                % maximum value of y
min(y)                % minimum value of y
range(y)              % min, max value of y
mean(y)               % arithmetic mean
median(y)             % median
var(y)                % variance
cov(y)                % covar matrix = variance for single vector
corrcoef(y)           % corr matrix = [1] for single vector
sort(y)               % sorting in ascending order
log(y)                % natural log
		

Listing R.5: Calculating Moments in R
Last updated June 2018

library(moments)
mean(y)          # mean
var(y)           # variance
sd(y)            # unbiased standard deviation, by default
skewness(y)      # skewness
kurtosis(y)      # kurtosis
		
Listing M.5: Calculating Moments
Last updated June 2018

mean(y)     % mean
var(y)      % variance
std(y)      % unbiased standard deviation, by default
skewness(y) % skewness
kurtosis(y) % kurtosis
		

Listing R.6: Basic Matrix Operations in R
Last updated June 2018

z = matrix(c(1,2,3,4),2,2) # z is a 2 x 2 matrix
x = matrix(c(1,2),1,2)     # x is a 1 x 2 matrix
## Note: z * x is undefined since the two matrices are not conformable
z %*% t(x)                 # this evaluates to a 2 x 1 matrix
rbind(z,x)                 # "stacking" z and x vertically
cbind(z,t(x))              # "stacking z and x' horizontally
## Note: dimensions must match along the combining axis
		
Listing M.6: Basic Matrix Operations
Last updated June 2018

z = [1, 2; 3, 4] % z is a 2 x 2 matrix (Note the use of ; as row separator)
x = [1, 2]       % x is a 1 x 2 matrix
%% Note: z * x is undefined since the two matrices are not conformable
z * x'           % this evaluates to a 2 x 1 matrix
vertcat(z,x)     % "stacking" z and x vertically
horzcat(z,x')    % "stacking z and x' horizontally
%% Note: dimensions must match along the combining axis)
		

Listing R.7: Statistical Distributions in R
Last updated June 2018

q = seq(from = -3, to = 3, length = 7)    # specify a set of values
p = seq(from = 0.1, to = 0.9, length = 9) # specify a set of probabilities
qnorm(p, mean = 0, sd = 1)                # element-wise inverse Normal quantile
pt(q, df = 4)                             # element-wise cdf under Student-t(4)
dchisq(q, df = 2)                         # element-wise pdf under Chisq(2)
## Similar syntax for other distributions
## q for quantile, p for cdf, d for pdf
## followed by the abbreviation of the distribution
## One can also obtain pseudorandom samples from distributions
x = rt(100, df = 5)                       # Sampling 100 times from TDist with 5 df
y = rnorm(50, mean = 0, sd = 1)           # Sampling 50 times from a standard normal
## Given data, we obtain MLE estimates of distribution parameters with package MASS:
library(MASS)
res = fitdistr(x, densfun = "normal")     # Fitting x to normal dist
print(res)
		
Listing M.7: Statistical Distributions
Last updated June 2018

q = -3:1:3                 % specify a set of values
p = 0.1:0.1:0.9            % specify a set of probabilities
norminv(p, 0, 1)           % element-wise inverse Normal quantile
tcdf(q, 4)                 % element-wise cdf under Student-t(4)
chi2pdf(q, 2)              % element-wise pdf under Chisq(2)
%% One can also obtain pseudorandom samples from distributions
x = trnd(5, 100, 1);       % Sampling 100 times from t dist with 5 df
y = normrnd(0, 1, 100, 1); % Sampling 50 times from a standard normal
%% Given sample data, we can also obtain MLE estimates of distribution parameters:
res = fitdist(x, "Normal") % Fitting x to normal dist
		

Listing R.8: Statistical Tests in R
Last updated June 2018

library(tseries)
x = rt(500, df = 5)                          # Create hypothetical dataset x
jarque.bera.test(x)                          # Jarque-Bera test for normality
Box.test(x, lag = 20, type = c("Ljung-Box")) # Ljung-Box test for serial correlation
		
Listing M.8: Statistical Tests
Last updated June 2018

x = trnd(5, 500, 1);                    % Create hypothetical dataset x
[h1, p1, jbstat] = jbtest(x)            % Jarque-Bera test for normality
[h2, p2, lbstat] = lbqtest(x,'lags',20) % Ljung-Box test for serial correlation
		

Listing R.9: Time Series in R
Last updated June 2018

x = rt(60, df = 5)         # Create hypothetical dataset x
par(mfrow=c(1,2), pty='s')
acf(x,20)                  # autocorrelation for lags 1:20
pacf(x,20)                 # partial autocorrelation for lags 1:20
		
Listing M.9: Time Series
Last updated June 2018

x = trnd(5, 60, 1); % Create hypothetical dataset x
subplot(1,2,1)
autocorr(x, 20)     % autocorrelation for lags 1:20
subplot(1,2,2)
parcorr(x,20)       % partial autocorrelation for lags 1:20
		

Listing R.10: Loops and Functions in R
Last updated June 2018

## For loops
for (i in 3:7)                             # iterates through [3,4,5,6,7]
    print(i^2)
## If-else loops
X = 10
if (X %% 3 == 0) {
    print("X is a multiple of 3")
} else {
    print("X is not a multiple of 3")
}
## Functions (example: a simple excess kurtosis function)
excess_kurtosis = function(x, excess = 3){ # note: excess optional, default=3
    m4 = mean((x-mean(x))^4)
    excess_kurt = m4/(sd(x)^4) - excess
    excess_kurt
}
x = rt(60, df = 5)                         # Create hypothetical dataset x
excess_kurtosis(x)
		
Listing M.10: Loops and Functions
Last updated June 2018

%% For loops
for i = 3:7                          % iterates through [3,4,5,6,7]
    i^2
end
%% If-else loops
X = 10;
if (rem(X,3) == 0)
    disp("X is a multiple of 3")
else
    disp("X is not a multiple of 3")
end
%% Functions (example: a simple excess kurtosis function)
%% NOTE: in MATLAB, functions can be defined in 2 locations:
%% 1) in a separate file (e.g. excess_kurtosis.m in this case) in the workspace
%% 2) in the same file as the rest of the code, BUT at the end of the file
%% function k = excess_kurtosis(x, excess)
%%     if nargin == 1                % if there is only 1 argument
%%         excess = 3;               % set excess = 3
%%     end                           % this is how optional param excess is set
%%     m4 = mean((x-mean(x)).^4);
%%     k = m4/(std(x)^4) - excess;
%% end
		

Listing R.11: Basic Graphs in R
Last updated June 2018

y = rnorm(50, mean = 0, sd = 1)
par(mfrow=c(2,2))               # sets up space for subplots
barplot(y)                      # bar plot
plot(y,type='l')                # line plot
hist(y)                         # histogram
plot(y)                         # scatter plot
		
Listing M.11: Basic Graphs
Last updated June 2018

y = normrnd(0, 1, 50, 1);
z = trnd(4, 50, 1);
subplot(2,2,1)
bar(y)                    % bar plot
subplot(2,2,2)
plot(y)                   % line plot
subplot(2,2,3)
histogram(y)              % histogram
subplot(2,2,4)
scatter(y,z)              % scatter plot
		

Listing R.12: Miscellaneous Useful Functions in R
Last updated June 2018

## Convert objects from one type to another with as.integer() etc
## To check type, use typeof(object)
x = 8.0
print(typeof(x))
x = as.integer(x)
print(typeof(x))
		
Listing M.12: Miscellaneous Useful Functions
Last updated June 2018

%% Convert objects from one type to another with int8() etc
%% To check type, use isfloat(object), isinteger(object) and so on
x = 8.0;
isfloat(x)
x = int8(x);
isinteger(x)