Financial Analytics With Python
Financial Analytics With Python
with Python
Contents
Pandas
• The Pandas module is used for working with tabular data
• It allows us to work with data in table form, such as in CSV or SQL database formats. We can also
create tables of our own, and edit or add columns or rows to tables
• Pandas provides us with some powerful objects like DataFrames (two-dimensional data
structure, i.e., data is aligned in a tabular fashion in rows and columns) and
• Series(one-dimensional labeled array capable of holding data of any type (integer, string, float,
python objects, etc.)) which are very useful for working with and analyzing data
• Dataframe is a collection of series that can be used to analyse the data.
Pandas, Numpy, Matplotlib - Basics
Numpy
• The Numpy module is mainly used for working with numerical data
• It provides us with a powerful object known as an Array
• An array is a data structure that stores values of same data type
• With Arrays, we can perform mathematical operations on multiple values at the same time, and
also perform operations between different Arrays, similar to matrix operations
Pandas, Numpy, Matplotlib - Basics
Matplotlib
• Matplotlib module is used for data visualization
• It provides functionality for us to draw charts and graphs, so that we can better understand and
present the data visually
• Pandas, Numpy and Matplotlib together allow us to analyse, manipulate and visualise data in very
useful ways
Data analysis in Python
from pylab import plt
plt.style.use('seaborn')
import matplotlib as mpl
mpl.rcParams['font.family'] = 'serif’
Note:
Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface
for drawing attractive and informative statistical graphics.
PyLab is a module that belongs to the Python mathematics library Matplotlib. PyLab combines the
numerical module numpy with the graphical plotting module pyplot.
Data analysis in Python
import numpy as np
import pandas as pd
> df
> df
> df = pd.DataFrame(a)
df
> np.array(df).round(6)
Base of Analytics
> df.sum()
> df.mean()
> df.cumsum()
> df.describe()
> np.sqrt(abs(df))
> np.sqrt(abs(df)).sum()
> type(df)
> df['No1’]
> type(df['No1’])
Groupby Operations
> df['Quarter'] = ['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2', 'Q3', 'Q3', 'Q3’]
df
> groups.mean()
> groups.max()
> groups.size()
> groups.size()
> groups.mean()
Financial Time Series Analysis - using Python
> data.tail()
> np.round(spx.tail())
> data.tail()
> rets.dropna(inplace=True)
> rets.corr()
Two-Dimensional Plotting
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
x = range(len(y))
plt.plot(x, y)
# tag: matplotlib_0
# title: Plot given x- and y-values
plt.plot(y)
# tag: matplotlib_1
# title: Plot given data as 1d-array
plt.plot(y.cumsum())
# tag: matplotlib_2
# title: Plot given a 1d-array with method attached
plt.plot(y.cumsum())
plt.grid(True) # adds a grid
plt.axis('tight') # adjusts the axis ranges
# tag: matplotlib_3_a
# title: Plot with grid and tight axes
Data Visualisation
plt.plot(y.cumsum())
plt.grid(True)
plt.xlim(-1, 20)
plt.ylim(np.min(y.cumsum()) - 1,
np.max(y.cumsum()) + 1)
# tag: matplotlib_3_b
# title: Plot with custom axes limits
plt.figure(figsize=(7, 4))
# the figsize parameter defines the
# size of the figure in (width, height)
plt.plot(y.cumsum(), 'b', lw=1.5)
plt.plot(y.cumsum(), 'ro')
plt.grid(True)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
# tag: matplotlib_4
# title: Plot with typical labels
Data Visualisation
plt.figure(figsize=(7, 4))
plt.plot(y, lw=1.5)
# plots two lines
plt.plot(y, 'ro')
# plots two dotted lines
plt.grid(True)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
# tag: matplotlib_5
# title: Plot with two data sets
Data Visualisation
plt.figure(figsize=(7, 4))
plt.plot(y[:, 0], lw=1.5, label='1st')
plt.plot(y[:, 1], lw=1.5, label='2nd')
plt.plot(y, 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
# tag: matplotlib_6
# title: Plot with labeled data sets
Data Visualisation
plt.figure(figsize=(7, 5))
plt.subplot(211)
plt.plot(y[:, 0], lw=1.5, label='1st')
plt.plot(y[:, 0], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.ylabel('value')
plt.title('A Simple Plot')
plt.subplot(212)
plt.plot(y[:, 1], 'g', lw=1.5, label='2nd')
plt.plot(y[:, 1], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
# tag: matplotlib_9
# title: Plot with two sub-plots
Data Visualisation
plt.figure(figsize=(9, 4))
plt.subplot(121)
plt.plot(y[:, 0], lw=1.5, label='1st')
plt.plot(y[:, 0], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('1st Data Set')
plt.subplot(122)
plt.bar(np.arange(len(y)), y[:, 1], width=0.5,
color='g', label='2nd')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.title('2nd Data Set')
# tag: matplotlib_10
# title: Plot combining line/point sub-plot with bar sub-plot
# size: 80
Data Visualisation – Other Plot Styles
y = np.random.standard_normal((1000, 2))
plt.figure(figsize=(7, 5))
plt.plot(y[:, 0], y[:, 1], 'ro')
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
# tag: matplotlib_11_a
# title: Scatter plot via +plot+ function
plt.figure(figsize=(7, 5))
plt.scatter(y[:, 0], y[:, 1], marker='o')
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
# tag: matplotlib_11_b
# title: Scatter plot via +scatter+ function
Data Visualisation – Other Plot Styles
c = np.random.randint(0, 10, len(y))
plt.figure(figsize=(7, 5))
plt.scatter(y[:, 0], y[:, 1], c=c, marker='o')
plt.colorbar()
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
# tag: matplotlib_11_c
# title: Scatter plot with third dimension
plt.figure(figsize=(7, 4))
plt.hist(y, label=['1st', '2nd'], bins=25)
plt.grid(True)
plt.legend(loc=0)
plt.xlabel('value')
plt.ylabel('frequency')
plt.title('Histogram')
# tag: matplotlib_12_a
# title: Histogram for two data sets
Data Visualisation – Other Plot Styles
plt.figure(figsize=(7, 4))
plt.hist(y, label=['1st', '2nd'], color=['b', 'g'],
stacked=True, bins=20)
plt.grid(True)
plt.legend(loc=0)
plt.xlabel('value')
plt.ylabel('frequency')
plt.title('Histogram')
# tag: matplotlib_12_b
# title: Stacked histogram for two data sets
# Illustrate the integral value, i.e. the area under the function
# between lower and upper limit
Ix = np.linspace(a, b)
Iy = func(Ix)
verts = [(a, 0)] + list(zip(Ix, Iy)) + [(b, 0)]
poly = Polygon(verts, facecolor='0.7', edgecolor='0.5')
ax.add_patch(poly)
Data Visualisation – Other Plot Styles
plt.text(0.5 * (a + b), 1, r"$\int_a^b f(x)\mathrm{d}x$",
horizontalalignment='center', fontsize=20)
ax.set_xticks((a, b))
ax.set_xticklabels(('$a$', '$b$'))
ax.set_yticks([func(a), func(b)])
ax.set_yticklabels(('$f(a)$', '$f(b)$'))
plt.grid(True)
# tag: matplotlib_math
# title: Exponential function, integral area and Latex labels
# size: 60
Data Visualisation – Financial Plots
import pandas as pd
import cufflinks as cf (pip install cufflinks in Anaconda Prompt)
iplot(qf.iplot(asFigure=True))
qf.add_bollinger_bands(periods=15, boll_std=2)
Data Visualisation – Financial Plots
iplot(qf.iplot(asFigure=True))
3d Plotting
strike[:2]
ax.set_xlabel('strike')
ax.set_ylabel('time-to-maturity')
ax.set_zlabel('implied volatility')
ax.set_xlabel('strike')
ax.set_ylabel('time-to-maturity')
ax.set_zlabel('implied volatility')
# tag: matplotlib_18
# title: 3d scatter plot for (fake) implied volatilities
# size: 70
Thank You