Appendix - Introduction (in MATLAB/Python)


Copyright 2011 - 2019 Jon Danielsson. This code is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. The GNU General Public License is available at: https://www.gnu.org/licenses/.


Listing M.1: Entering and Printing Data
Last updated June 2018

x = 10; % assign x the value 10, silencing output print with ;
disp(x) % display x
		
Listing P.1: Entering and Printing Data in Python
Last updated June 2018

x = 10   # assign x the value 10
print(x) # print the value of x
		

Listing M.2: Vectors, Matrices and Sequences
Last updated June 2018

y = [1,3,5,7,9]            % lists are denoted by square brackets
y(3)                       % calling 3rd element (MATLAB indices start at 1)
size(y)                    % shows that y is 1 x 5 (a row vector, by default)
length(y)                  % as expected, y has length 5
v = nan(2,3)               % fill a 2 x 3 matrix with NaN values
size(v)                    % as expected, v is size (2,3)
w = repmat([1,2,3]', 2, 3) % repeats matrix twice by rows, thrice by columns
s = 1:10                   % s is a list of integers from 1 to 10 inclusive
		
Listing P.2: Vectors, Matrices and Sequences in Python
Last updated June 2018

y = [1,3,5,7,9]                        # lists in square brackets are stored as arrays
print(y)
print(y[2])                            # 3rd element (Python indices start at 0)
print(len(y))                          # as expected, y has length 5
import numpy as np
v = np.full([2,3], np.nan)             # create a 2x3 matrix with NaN values
print(v)
print(v.shape)                         # as expected, v is size (2,3)
w=np.tile(np.transpose([1,2,3]),(3,2)) # repeats twice by rows, thrice by columns
print(w)
s = range(10)                          # an iterator from 0 to 9
print([x for x in s])                  # return  elements using list comprehension
		

Listing M.3: Importing Data
Last updated June 2018

%% There are many data sources for financial data, for instance
%% Yahoo Finance, AlphaVantage and Quandl. However, some of the
%% free data sources have numerous issues with accuracy and
%% handling of missing data, so only CSV importing is shown here.
%%
%% For csv data, one can use csvread to read it
%%
%% Example:
%% data = csvread('data.csv', 1, 0);
%% the two numbers behind are the row offset and column offset
%% so here we ignore the first row (ie. the header)
		
Listing P.3: Importing Data in Python
Last updated June 2018

## There are many data sources for financial data, for instance
## Yahoo Finance, AlphaVantage and Quandl. However, some of the
## free data sources have numerous issues with accuracy and
## handling of missing data, so only CSV importing is shown here.
##
## For csv data, one can use numpy.loadtxt() to read it
##
## Example:
## using numpy as np
## data = np.loadtxt('data.csv', delimiter = ',', skiprows = 1)
## skiprows=1 ensures that the header row is skipped
		

Listing M.4: Basic Summary Statistics
Last updated June 2018

y = [3.14,15,9.26,5];
sum(y)                % sum of all elements of y
prod(y)               % product of all elements of y
max(y)                % maximum value of y
min(y)                % minimum value of y
range(y)              % min, max value of y
mean(y)               % arithmetic mean
median(y)             % median
var(y)                % variance
cov(y)                % covar matrix = variance for single vector
corrcoef(y)           % corr matrix = [1] for single vector
sort(y)               % sorting in ascending order
log(y)                % natural log
		
Listing P.4: Basic Summary Statistics in Python
Last updated June 2018

import numpy as np
y = [3.14,15,9.26,5]
print(sum(y))         # sum of all elements of y
print(max(y))         # maximum value of y
print(min(y))         # minimum value of y
print(np.mean(y))     # arithmetic mean
print(np.median(y))   # median
print(np.var(y))      # variance
print(np.cov(y))      # covar matrix = variance for single vector
print(np.corrcoef(y)) # corr matrix = [1] for single vector
print(np.sort(y))     # sort in ascending order
print(np.log(y))      # natural log
		

Listing M.5: Calculating Moments
Last updated June 2018

mean(y)     % mean
var(y)      % variance
std(y)      % unbiased standard deviation, by default
skewness(y) % skewness
kurtosis(y) % kurtosis
		
Listing P.5: Calculating Moments in Python
Last updated June 2018

import numpy as np
from scipy import stats
print(np.mean(y))                        # mean
print(np.var(y))                         # variance
print(np.std(y, ddof = 1))               # ddof = 1 for unbiased standard deviation
print(stats.skew(y))                     # skewness
print(stats.kurtosis(y, fisher = False)) # fisher = False gives Pearson definition
		

Listing M.6: Basic Matrix Operations
Last updated June 2018

z = [1, 2; 3, 4] % z is a 2 x 2 matrix (Note the use of ; as row separator)
x = [1, 2]       % x is a 1 x 2 matrix
%% Note: z * x is undefined since the two matrices are not conformable
z * x'           % this evaluates to a 2 x 1 matrix
vertcat(z,x)     % "stacking" z and x vertically
horzcat(z,x')    % "stacking z and x' horizontally
%% Note: dimensions must match along the combining axis)
		
Listing P.6: Basic Matrix Operations in Python
Last updated June 2018

import numpy as np
z = np.matrix([[1, 2], [3, 4]])                   # z is a 2 x 2 matrix
x = np.matrix([1, 2])                             # x is a 1 x 2 matrix
## Note: z * x is undefined since the two matrices are not conformable
print(z * np.transpose(x))                        # this evaluates to a 2 x 1 matrix
b = np.concatenate((z,x), axis = 0)               # "stacking" z and x vertically
print(b)
c = np.concatenate((z,np.transpose(x)), axis = 1) # "stacking" z and x horizontally
print(c)
## note: dimensions must match along the combining axis
		

Listing M.7: Statistical Distributions
Last updated June 2018

q = -3:1:3                 % specify a set of values
p = 0.1:0.1:0.9            % specify a set of probabilities
norminv(p, 0, 1)           % element-wise inverse Normal quantile
tcdf(q, 4)                 % element-wise cdf under Student-t(4)
chi2pdf(q, 2)              % element-wise pdf under Chisq(2)
%% One can also obtain pseudorandom samples from distributions
x = trnd(5, 100, 1);       % Sampling 100 times from t dist with 5 df
y = normrnd(0, 1, 100, 1); % Sampling 50 times from a standard normal
%% Given sample data, we can also obtain MLE estimates of distribution parameters:
res = fitdist(x, "Normal") % Fitting x to normal dist
		
Listing P.7: Statistical Distributions in Python
Last updated June 2018

import numpy as np
from scipy import stats
q = np.arange(-3,4,1)                    # specify a set of values
p = np.arange(0.1,1.0,0.1)               # specify a set of probabilities
print(stats.norm.ppf(p))                 # element-wise inverse Normal quantile
print(stats.t.cdf(q,4))                  # element-wise cdf under Student-t(4)
print(stats.chi2.pdf(q,2))               # element-wise pdf under Chisq(2)
## One can also obtain pseudorandom samples from distributions using numpy.random
x = np.random.standard_t(df=5, size=100) # Sampling 100 times from TDist with 5 df
y = np.random.normal(size=50)            # Sampling 50 times from a standard normal
## Given data, we obtain MLE estimates of parameters with stats:
res = stats.norm.fit(x)                  # Fitting x to normal dist
print(res)
		

Listing M.8: Statistical Tests
Last updated June 2018

x = trnd(5, 500, 1);                    % Create hypothetical dataset x
[h1, p1, jbstat] = jbtest(x)            % Jarque-Bera test for normality
[h2, p2, lbstat] = lbqtest(x,'lags',20) % Ljung-Box test for serial correlation
		
Listing P.8: Statistical Tests in Python
Last updated June 2018

from scipy import stats
from statsmodels.stats.diagnostic import acorr_ljungbox
x = np.random.standard_t(df=5, size=500)                # Create dataset x
print(stats.jarque_bera(x))                             # Jarque-Bera test
print(acorr_ljungbox(x, lags=20))                       # Ljung-Box test
		

Listing M.9: Time Series
Last updated June 2018

x = trnd(5, 60, 1); % Create hypothetical dataset x
subplot(1,2,1)
autocorr(x, 20)     % autocorrelation for lags 1:20
subplot(1,2,2)
parcorr(x,20)       % partial autocorrelation for lags 1:20
		
Listing P.9: Time Series in Python
Last updated June 2018

import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
y = np.random.standard_t(df = 5, size = 60)        # Create hypothetical dataset y
q1 = sm.tsa.stattools.acf(y, nlags=20)             # autocorrelation for lags 1:20
plt.bar(x = np.arange(1,len(q1)), height = q1[1:])
plt.show()
plt.close()
q2 = sm.tsa.stattools.pacf(y, nlags=20)            # partial autocorr for lags 1:20
plt.bar(x = np.arange(1,len(q2)), height = q2[1:])
plt.show()
plt.close()
		

Listing M.10: Loops and Functions
Last updated June 2018

%% For loops
for i = 3:7                          % iterates through [3,4,5,6,7]
    i^2
end
%% If-else loops
X = 10;
if (rem(X,3) == 0)
    disp("X is a multiple of 3")
else
    disp("X is not a multiple of 3")
end
%% Functions (example: a simple excess kurtosis function)
%% NOTE: in MATLAB, functions can be defined in 2 locations:
%% 1) in a separate file (e.g. excess_kurtosis.m in this case) in the workspace
%% 2) in the same file as the rest of the code, BUT at the end of the file
%% function k = excess_kurtosis(x, excess)
%%     if nargin == 1                % if there is only 1 argument
%%         excess = 3;               % set excess = 3
%%     end                           % this is how optional param excess is set
%%     m4 = mean((x-mean(x)).^4);
%%     k = m4/(std(x)^4) - excess;
%% end
		
Listing P.10: Loops and Functions in Python
Last updated June 2018

import numpy as np
## For loops
for i in range(3,8):                     # NOTE: range(start, end), end excluded
    print(i**2)                          # range(3,8) iterates through [3,4,5,6,7)
## If-else loops
X = 10
if X % 3 == 0:
    print("X is a multiple of 3")
else:
    print("X is not a multiple of 3")
## Functions (example: a simple excess kurtosis function)
def excess_kurtosis(x, excess = 3):      # note: excess optional, default = 3
    m4=np.mean((x-np.mean(x))**4)        # note: exponentiation in Python uses **
    excess_kurt=m4/(np.std(x)**4)-excess
    return excess_kurt
x = np.random.standard_t(df=5,size=60)   # Create hypothetical dataset x
print(excess_kurtosis(x))
		

Listing M.11: Basic Graphs
Last updated June 2018

y = normrnd(0, 1, 50, 1);
z = trnd(4, 50, 1);
subplot(2,2,1)
bar(y)                    % bar plot
subplot(2,2,2)
plot(y)                   % line plot
subplot(2,2,3)
histogram(y)              % histogram
subplot(2,2,4)
scatter(y,z)              % scatter plot
		
Listing P.11: Basic Graphs in Python
Last updated June 2018

import numpy as np
import matplotlib.pyplot as plt
y = np.random.normal(size = 50)
z = np.random.standard_t(df = 4, size = 50)
## using Matplotlib to plot bar, line, histogram and scatter plots
plt.subplot(2,2,1)
plt.bar(range(len(y)), y)
plt.subplot(2,2,2)
plt.plot(y)
plt.subplot(2,2,3)
plt.hist(y)
plt.subplot(2,2,4)
plt.scatter(y,z)
		

Listing M.12: Miscellaneous Useful Functions
Last updated June 2018

%% Convert objects from one type to another with int8() etc
%% To check type, use isfloat(object), isinteger(object) and so on
x = 8.0;
isfloat(x)
x = int8(x);
isinteger(x)
		
Listing P.12: Miscellaneous Useful Functions in Python
Last updated June 2018

## Convert objects from one type to another with int(), float() etc
## To check type, use type(object)
x = 8.0
print(type(x))
x = int(x)
print(type(x))