Copyright 2011 - 2019 Jon Danielsson. This code is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This code is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. The GNU General Public License is available at: https://www.gnu.org/licenses/.

Last updated June 2018

```
x = 10 # assign x the value 10
print(x) # print x
```

Last updated June 2018

```
x = 10 # assign x the value 10
print(x) # print the value of x
```

Last updated June 2018

```
y = c(1,3,5,7,9) # create vector using c()
print(y)
print(y[3]) # calling 3rd element (R indices start at 1)
print(dim(y)) # gives NULL since y is a vector, not a matrix
print(length(y)) # as expected, y has length 5
v = matrix(nrow=2,ncol=3) # fill a 2 x 3 matrix with NaN values (default)
print(dim(v)) # as expected, v is size (2,3)
w = matrix(c(1,2,3),nrow=6,ncol=3) # repeats matrix twice by rows, thrice by columns
print(w)
s = 1:10 # s is a list of integers from 1 to 10 inclusive
print(s)
```

Last updated June 2018

```
y = [1,3,5,7,9] # lists in square brackets are stored as arrays
print(y)
print(y[2]) # 3rd element (Python indices start at 0)
print(len(y)) # as expected, y has length 5
import numpy as np
v = np.full([2,3], np.nan) # create a 2x3 matrix with NaN values
print(v)
print(v.shape) # as expected, v is size (2,3)
w=np.tile(np.transpose([1,2,3]),(3,2)) # repeats twice by rows, thrice by columns
print(w)
s = range(10) # an iterator from 0 to 9
print([x for x in s]) # return elements using list comprehension
```

Last updated June 2018

```
## There are many data sources for financial data, for instance
## Yahoo Finance, AlphaVantage and Quandl. However, some of the
## free data sources have numerous issues with accuracy and
## handling of missing data, so only CSV importing is shown here.
##
## For csv data, one can use read.csv to read it
##
## Example:
## data = read.csv('Ch1aprices.csv', header=TRUE, sep=',')
## one can use the zoo() function from the package zoo
## to turn the data into a timeseries (see Listing 1.1/1.2)
```

Last updated June 2018

```
## There are many data sources for financial data, for instance
## Yahoo Finance, AlphaVantage and Quandl. However, some of the
## free data sources have numerous issues with accuracy and
## handling of missing data, so only CSV importing is shown here.
##
## For csv data, one can use numpy.loadtxt() to read it
##
## Example:
## using numpy as np
## data = np.loadtxt('data.csv', delimiter = ',', skiprows = 1)
## skiprows=1 ensures that the header row is skipped
```

Last updated June 2018

```
y=matrix(c(3.1,4.15,9))
sum(y) # sum of all elements of y
prod(y) # product of all elements of y
max(y) # maximum value of y
min(y) # minimum value of y
range(y) # min, max value of y
mean(y) # arithmetic mean
median(y) # median
var(y) # variance
cov(y) # covar matrix = variance for single vector
cor(y) # corr matrix = [1] for single vector
sort(y) # sorting in ascending order
log(y) # natural log
```

Last updated June 2018

```
import numpy as np
y = [3.14,15,9.26,5]
print(sum(y)) # sum of all elements of y
print(max(y)) # maximum value of y
print(min(y)) # minimum value of y
print(np.mean(y)) # arithmetic mean
print(np.median(y)) # median
print(np.var(y)) # variance
print(np.cov(y)) # covar matrix = variance for single vector
print(np.corrcoef(y)) # corr matrix = [1] for single vector
print(np.sort(y)) # sort in ascending order
print(np.log(y)) # natural log
```

Last updated June 2018

```
library(moments)
mean(y) # mean
var(y) # variance
sd(y) # unbiased standard deviation, by default
skewness(y) # skewness
kurtosis(y) # kurtosis
```

Last updated June 2018

```
import numpy as np
from scipy import stats
print(np.mean(y)) # mean
print(np.var(y)) # variance
print(np.std(y, ddof = 1)) # ddof = 1 for unbiased standard deviation
print(stats.skew(y)) # skewness
print(stats.kurtosis(y, fisher = False)) # fisher = False gives Pearson definition
```

Last updated June 2018

```
z = matrix(c(1,2,3,4),2,2) # z is a 2 x 2 matrix
x = matrix(c(1,2),1,2) # x is a 1 x 2 matrix
## Note: z * x is undefined since the two matrices are not conformable
z %*% t(x) # this evaluates to a 2 x 1 matrix
rbind(z,x) # "stacking" z and x vertically
cbind(z,t(x)) # "stacking z and x' horizontally
## Note: dimensions must match along the combining axis
```

Last updated June 2018

```
import numpy as np
z = np.matrix([[1, 2], [3, 4]]) # z is a 2 x 2 matrix
x = np.matrix([1, 2]) # x is a 1 x 2 matrix
## Note: z * x is undefined since the two matrices are not conformable
print(z * np.transpose(x)) # this evaluates to a 2 x 1 matrix
b = np.concatenate((z,x), axis = 0) # "stacking" z and x vertically
print(b)
c = np.concatenate((z,np.transpose(x)), axis = 1) # "stacking" z and x horizontally
print(c)
## note: dimensions must match along the combining axis
```

Last updated June 2018

```
q = seq(from = -3, to = 3, length = 7) # specify a set of values
p = seq(from = 0.1, to = 0.9, length = 9) # specify a set of probabilities
qnorm(p, mean = 0, sd = 1) # element-wise inverse Normal quantile
pt(q, df = 4) # element-wise cdf under Student-t(4)
dchisq(q, df = 2) # element-wise pdf under Chisq(2)
## Similar syntax for other distributions
## q for quantile, p for cdf, d for pdf
## followed by the abbreviation of the distribution
## One can also obtain pseudorandom samples from distributions
x = rt(100, df = 5) # Sampling 100 times from TDist with 5 df
y = rnorm(50, mean = 0, sd = 1) # Sampling 50 times from a standard normal
## Given data, we obtain MLE estimates of distribution parameters with package MASS:
library(MASS)
res = fitdistr(x, densfun = "normal") # Fitting x to normal dist
print(res)
```

Last updated June 2018

```
import numpy as np
from scipy import stats
q = np.arange(-3,4,1) # specify a set of values
p = np.arange(0.1,1.0,0.1) # specify a set of probabilities
print(stats.norm.ppf(p)) # element-wise inverse Normal quantile
print(stats.t.cdf(q,4)) # element-wise cdf under Student-t(4)
print(stats.chi2.pdf(q,2)) # element-wise pdf under Chisq(2)
## One can also obtain pseudorandom samples from distributions using numpy.random
x = np.random.standard_t(df=5, size=100) # Sampling 100 times from TDist with 5 df
y = np.random.normal(size=50) # Sampling 50 times from a standard normal
## Given data, we obtain MLE estimates of parameters with stats:
res = stats.norm.fit(x) # Fitting x to normal dist
print(res)
```

Last updated June 2018

```
library(tseries)
x = rt(500, df = 5) # Create hypothetical dataset x
jarque.bera.test(x) # Jarque-Bera test for normality
Box.test(x, lag = 20, type = c("Ljung-Box")) # Ljung-Box test for serial correlation
```

Last updated June 2018

```
from scipy import stats
from statsmodels.stats.diagnostic import acorr_ljungbox
x = np.random.standard_t(df=5, size=500) # Create dataset x
print(stats.jarque_bera(x)) # Jarque-Bera test
print(acorr_ljungbox(x, lags=20)) # Ljung-Box test
```

Last updated June 2018

```
x = rt(60, df = 5) # Create hypothetical dataset x
par(mfrow=c(1,2), pty='s')
acf(x,20) # autocorrelation for lags 1:20
pacf(x,20) # partial autocorrelation for lags 1:20
```

Last updated June 2018

```
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
y = np.random.standard_t(df = 5, size = 60) # Create hypothetical dataset y
q1 = sm.tsa.stattools.acf(y, nlags=20) # autocorrelation for lags 1:20
plt.bar(x = np.arange(1,len(q1)), height = q1[1:])
plt.show()
plt.close()
q2 = sm.tsa.stattools.pacf(y, nlags=20) # partial autocorr for lags 1:20
plt.bar(x = np.arange(1,len(q2)), height = q2[1:])
plt.show()
plt.close()
```

Last updated June 2018

```
## For loops
for (i in 3:7) # iterates through [3,4,5,6,7]
print(i^2)
## If-else loops
X = 10
if (X %% 3 == 0) {
print("X is a multiple of 3")
} else {
print("X is not a multiple of 3")
}
## Functions (example: a simple excess kurtosis function)
excess_kurtosis = function(x, excess = 3){ # note: excess optional, default=3
m4 = mean((x-mean(x))^4)
excess_kurt = m4/(sd(x)^4) - excess
excess_kurt
}
x = rt(60, df = 5) # Create hypothetical dataset x
excess_kurtosis(x)
```

Last updated June 2018

```
import numpy as np
## For loops
for i in range(3,8): # NOTE: range(start, end), end excluded
print(i**2) # range(3,8) iterates through [3,4,5,6,7)
## If-else loops
X = 10
if X % 3 == 0:
print("X is a multiple of 3")
else:
print("X is not a multiple of 3")
## Functions (example: a simple excess kurtosis function)
def excess_kurtosis(x, excess = 3): # note: excess optional, default = 3
m4=np.mean((x-np.mean(x))**4) # note: exponentiation in Python uses **
excess_kurt=m4/(np.std(x)**4)-excess
return excess_kurt
x = np.random.standard_t(df=5,size=60) # Create hypothetical dataset x
print(excess_kurtosis(x))
```

Last updated June 2018

```
y = rnorm(50, mean = 0, sd = 1)
par(mfrow=c(2,2)) # sets up space for subplots
barplot(y) # bar plot
plot(y,type='l') # line plot
hist(y) # histogram
plot(y) # scatter plot
```

Last updated June 2018

```
import numpy as np
import matplotlib.pyplot as plt
y = np.random.normal(size = 50)
z = np.random.standard_t(df = 4, size = 50)
## using Matplotlib to plot bar, line, histogram and scatter plots
plt.subplot(2,2,1)
plt.bar(range(len(y)), y)
plt.subplot(2,2,2)
plt.plot(y)
plt.subplot(2,2,3)
plt.hist(y)
plt.subplot(2,2,4)
plt.scatter(y,z)
```

Last updated June 2018

```
## Convert objects from one type to another with as.integer() etc
## To check type, use typeof(object)
x = 8.0
print(typeof(x))
x = as.integer(x)
print(typeof(x))
```

Last updated June 2018

```
## Convert objects from one type to another with int(), float() etc
## To check type, use type(object)
x = 8.0
print(type(x))
x = int(x)
print(type(x))
```