cp5_DataVisualization
May 31, 2019
[1]: import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
[2]: np.random.seed(1000)
y = np.random.standard_normal(20)
[3]: x = range(len(y))
plt.plot(x,y)
plt.show()
[4]: plt.plot(y)
plt.show()
1
[5]: plt.plot(y.cumsum())
[5]: [<matplotlib.lines.Line2D at 0x10c599b0>]

2
1 plt.axis()
[6]: plt.plot(y.cumsum())
plt.grid(True) #adds a grid
plt.axis('tight') # adjusts the axis ranges
plt.show()
[7]: plt.plot(y.cumsum())
plt.grid(True) #adds a grid
#adjusts the axis ranges
plt.show()

3


2 set the minimum and maximum values of each axis by usingplt.xlim and plt.ylim
[8]: plt.plot( y.cumsum() )
plt.grid(True)
plt.xlim(-1,20)
plt.ylim(np.min(y.cumsum())-1,
np.max(y.cumsum())+1)
[8]: (-3.1915310617211072, 1.4342209788376488)
4
[11]: plt.figure(figsize=(7,4))
# the figsize parameter define the
# size of the figure in (width, height)
plt.plot(y.cumsum(), 'b', lw=1.5) #blue color for line, line width=1.5
plt.plot(y.cumsum(), 'go') #marker='o' marker's color='g'
plt.grid(True)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
[11]: Text(0.5, 1.0, 'A Simple Plot')
5
3 generates first a NumPy ndarray of shape 20 OE 2 with standard normally
distributed (pseudo)random numbers.
[12]: np.random.seed(2000)
y = np.random.standard_normal((20,2)).cumsum(axis=0)
6
[16]: plt.figure(figsize=(7,4))
plt.plot(y,lw=1.5)
#plots two lines
plt.plot(y, 'bo')
#plots two dotted lines
plt.grid(True)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
[16]: Text(0.5, 1.0, 'A Simple Plot')

plt.legend
[18]: plt.figure(figsize=(7,4))
# np.random.seed(2000)
# y = np.random.standard_normal((20,2)).cumsum(axis=0)
plt.plot(y[:,0], lw=1.5, label='1st') ###########
plt.plot(y[:,1], lw=1.5, label='2nd') ###########
plt.plot(y,'ro')
plt.grid(True)
plt.legend(loc=0)###########
plt.axis('tight')
7
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
[18]: Text(0.5, 1.0, 'A Simple Plot')


two different yaxes
[1]: import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
[11]: np.random.seed(2000)
y = np.random.standard_normal((20,2)).cumsum(axis=0)
[12]: y[:, 0] = y[:, 0]*100
plt.figure(figsize=(7,4))
plt.plot(y[:,0], lw=1.5, label='1st')
plt.plot(y[:,1], lw=1.5, label='2nd')
plt.plot(y,'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value')
plt.title('A Simple Plot')
plt.show()
[13]: # plot first data set using first (left) axis
fig, ax1 = plt.subplots()
plt.plot( y[:, 0], 'b', lw=1.5, label='1st')
plt.plot( y[:, 0], 'ro')
plt.legend(loc=8)
plt.ylabel('value 1st')
plt.grid(True)
plt.axis('tight')
plt.xlabel('index')
plt.ylabel('value 1st')
plt.title('A Simple Plot')
# plot second data set using second (right) axis
ax2 = ax1.twinx()
plt.plot( y[:, 1], 'g', lw=1.5, label='2nd')
plt.plot( y[:, 1], 'ro')
plt.legend(loc=0)
plt.ylabel('value 2nd')
plt.show()

two separate subplots
[15]:
plt.figure( figsize=(7,5) )
plt.subplot(211)
plt.plot(y[:, 0], 'b', lw=1.5, label='1st')
plt.plot(y[:, 0], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.ylabel('value')
plt.xlabel('index')
plt.title('A Simple Plot')
plt.subplot(212)
plt.plot(y[:, 1], 'g', lw=1.5, label='2nd')
plt.plot(y[:, 1], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.ylabel('value')
plt.show()

choose two different plot types
[17]: plt.figure(figsize=(9,4))
plt.subplot(121)
plt.plot( y[:, 0], lw=1.5, label='1st')
plt.plot( y[:, 0], 'ro')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.legend(loc=0)
plt.xlabel('index')
plt.title('1st Data Set')
plt.ylabel('value')
plt.subplot(122)
plt.bar(np.arange(len(y)), y[:, 1], width=0.5, color='g', label='2nd')
plt.grid(True)
plt.legend(loc=0)
plt.axis('tight')
plt.legend(loc=0)
plt.xlabel('index')
plt.title('2nd Data Set')
plt.show()

scatter plot
plt.plot(y[:,0], y[:,1], ’ro’)
plt.scatter(y[:,0], y[:,1], marker=’o’)
[18]:
y = np.random.standard_normal((1000,2)) #two-dimensional data set
plt.figure( figsize=(7,5) )
plt.plot(y[:,0], y[:,1], 'ro') ######################
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
plt.show()
[19]:
plt.figure( figsize=(7,5) )
plt.scatter(y[:,0], y[:,1], marker='o')##################
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
plt.show()

plt.colorbar()
[21]: c = np.random.randint(0, 10, len(y))
[24]: plt.figure( figsize=(7,5) )
#y = np.random.standard_normal((1000,2)) #two-dimensional data set
plt.scatter(y[:,0], y[:,1], c=c, marker='o')
plt.colorbar()##############
plt.grid(True)
plt.xlabel('1st')
plt.ylabel('2nd')
plt.title('Scatter Plot')
plt.show()

plt.hist(y, label=[’1st’, ’2nd’], bins=25)
puts the frequency values of the two data sets next to each other in the same plot
[27]:
plt.figure(figsize=(7,4) )
plt.hist(y, label=['1st', '2nd'], bins=25) ###
plt.grid(True)
plt.legend(loc=0)
plt.xlabel('value')
plt.ylabel('frequency')
plt.title('Histogram')
plt.show()

plt.hist(x, bins=10, range=None, normed=False, weights=None, cumulative=False, bottom=
None, histtype=‘bar’, align=‘mid’, orientation=‘vertical’, rwidth=None, log=False,
color=None, label=None, stacked=False, hold=None, **kwargs)
stacked histogram
the data of the two data sets is stacked in the histogram
[6]:
import matplotlib.pyplot as plt
import numpy as np
[7]:
np.random.seed(2000)
y = np.random.standard_normal((1000,2)) #two-dimensional data set
[12]:
plt.figure( figsize = (7,4) )
plt.hist(y, label=['1st', '2nd'], color=['b','g'], stacked = True, bins=20, rwidth=0.8)
plt.grid(True)
plt.legend(loc='upper right')
plt.xlabel('value')
plt.ylabel('frequency')
plt.title('Histogram')
plt.show()

boxplot
[13]:
fig, ax=plt.subplots(figsize=(7,4))
plt.boxplot(y)################
plt.grid(True)
#plt.setp, which sets properties for a (set of) plotting instance(s)
plt.setp(ax, xticklabels=['1st', '2nd'])#############
# or line=plt.plot(data, 'r')
# plt.setp(line, linestype='-', xticklabels=['1st', '2nd'])
plt.xlabel('data set')
plt.ylabel('value')
plt.title('Boxplot')
plt.show()

from matplotlib.patches import Polygon
[16]:
from matplotlib.patches import Polygon
import numpy as np
[17]:
def func(x):
return 0.5*np.exp(x) +1
[21]:
x = np.linspace(0,2)
y = func(x)
[42]:
fig, ax = plt.subplots(figsize=(7,5))
plt.plot(x,y,'blue',linewidth=2)
plt.ylim(ymin=0)
a, b = 0.5, 1.5 # integral limits
# Illustrate the integral value, i.e. the area under the function
# between the lower and upper limits
Ix = np.linspace(a,b)
Iy = func(Ix)
verts = [(a,0)] + list(zip(Ix, Iy)) + [(b,0)]
poly = Polygon(verts, facecolor='0.7', edgecolor='.5')
ax.add_patch(poly)
# LaTeX code is passed between two dollar signs ($ $)
# The first two parameters of both functions are coordinate values to place the respective text
plt.text( 0.5*(a+b), 1, r"$f(x)\mathrm{d}x$", horizontalalignment='center', fontsize=20 )
ax.set_xticks((a,b))
ax.set_xticklabels(('$a$', '$b$'))##################
plt.figtext(0.9, 0.075, '$x$')##################
ax.set_yticks( [func(a), func(b)] )
ax.set_yticklabels(('$f(a)$', '$f(b)$'))############
plt.figtext(0.075, 0.9, '$f(x)$')###############
plt.grid(True)
plt.show()
[43]:
fig, ax = plt.subplots(figsize=(7,5))
plt.plot(x,y,'blue',linewidth=2)
plt.ylim(ymin=0)
a, b = 0.5, 1.5 # integral limits
# Illustrate the integral value, i.e. the area under the function
# between the lower and upper limits
Ix = np.linspace(a,b)
Iy = func(Ix)
verts = [(a,0)] + list(zip(Ix, Iy)) + [(b,0)]
poly = Polygon(verts, facecolor='0.7', edgecolor='.5')
ax.add_patch(poly)
# LaTeX code is passed between two dollar signs ($ $)
# The first two parameters of both functions are coordinate values to place the respective text
plt.text( 0.5*(a+b), 1, r"$\int_a^b f(x)\mathrm{d}x$", horizontalalignment='center', fontsize=20 )
ax.set_xticks((a,b))
ax.set_xticklabels(('$a$', '$b$'))##################
plt.figtext(0.9, 0.075, '$x$')##################
ax.set_yticks( [func(a), func(b)] )
ax.set_yticklabels(('$f(a)$', '$f(b)$'))############
plt.figtext(0.075, 0.9, '$f(x)$')###############
plt.grid(True)
plt.show()

Financial Plots
[17]:
#pip install plotly
import plotly
[18]:
#pip install https://github.com/santosjorge/cufflinks/archive/master.zip
import cufflinks as cf
[21]:
import pandas as pd
import cufflinks as cf
#Imports the offline plotting capabilities of plotly.
import plotly.offline as plyo
#Turns on the notebook plotting mode.
plyo.init_notebook_mode(connected=True)
import numpy as np
[23]:
a = np.random.standard_normal((250,5)).cumsum(axis=0)
index = pd.date_range('2019-1-1', #The start date for the DatetimeIndex object.
freq='B', #The frequency (business daily).
periods=len(a)) #The number of periods needed
[29]: a
array([[ 0.33666765, 1.68384697, 0.24207071, -0.97795537,
-1.47938526],
[ -0.26859689, 2.6626176 , -0.16828017, -1.18032388,
-0.87924579],
[ -0.94184763, 3.48609362, 0.86696027, -1.17238191,
-0.89632858],
...,
[ 2.94885525, 3.70059969, -7.1249625 , -2.31064923,
4.57395464],
[ -0.14697708, 4.13456458, -10.08114105, -1.68456493,
3.34544164],
[ 0.63524304, 4.6907317 , -9.98582813, 0.12757017,
2.44119391]])
[24]: df = pd.DataFrame(100 + 5*a, #A linear transform of the raw data
columns=list('abcde'), #The column headers as single,characters
index=index) #The DatetimeIndex object.
[25]: df.head() #The first five rows of data.
Cufflinks adds a new method to the DataFrame class: df.iplot(). This method uses plotly in the
backend to create interactive plots. The code examples in this section all make use of the option
to download the interactive plot as a static bitmap, which in turn is embedded in the text. In the
Jupyter Notebook environment, the created plots are all interactive. The result of the following
code is shown
[30]:
plyo.iplot( #This makes use of the offline (notebook mode) capabilities of,→plotly.
df.iplot(asFigure=True), #The df.iplot() method is called with parameter,→asFigure=True to allow for local plottingand
# embedding.
# image='png', #The image option provides in addition a static,→bitmap version of the plot.
filename='ply_01' #The filename for the bitmap to be saved is specified,→(the file type extension is added automatically).
)
[34]:
plyo.iplot(
df[['a','b']].iplot(asFigure=True,
theme='polar', #Selects a theme (plotting style) for the plot.
title='A Time Series Plot',
xTitle='date',
yTitle='value',
mode={'a':'markers', 'b':'lines+markers'}, #Defines the plotting mode (line, marker, etc.) by column.
symbol={'a':'circle', 'b':'diamond'}, #Defines the symbols to be used as markers by column.
size=5,
colors={'a':'blue', 'b':'red'},
),
# image='png',
filename='ply_02'
)
[36]:
plyo.iplot(
df.iplot(kind='hist', #Specifies the plotting type.
subplots=True, #Requires separate subplots for every column.
bins=15, #Sets the bins parameter (buckets to be used = bars to be plotted).
asFigure=True), #The df.iplot() method is called with parameter,asFigure=True to allow for local plottingand #embedding
# image='png',
filename='ply_03'
)
[37]:
raw = pd.read_csv("../source/fxcm_eur_usd_eod_data.csv", index_col=0,parse_dates=True)
raw.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1547 entries, 2013-01-01 22:00:00 to 2017-12-31 22:00:00
Data columns (total 8 columns):
BidOpen 1547 non-null float64
BidHigh 1547 non-null float64
BidLow 1547 non-null float64
BidClose 1547 non-null float64
AskOpen 1547 non-null float64
AskHigh 1547 non-null float64
AskLow 1547 non-null float64
AskClose 1547 non-null float64
dtypes: float64(8)
memory usage: 108.8 KB
[38]:
#Selects four columns from the DataFrame object (Open-High-Low-Close, or OHLC).
quotes = raw[['AskOpen', 'AskHigh', 'AskLow', 'AskClose']]
#Only a few data rows are used for the visualization. {-60 to -1}
quotes = quotes.iloc[-60:]
quotes.tail()
[38]: AskOpen AskHigh AskLow AskClose
2017-12-25 22:00:00 1.18667 1.18791 1.18467 1.18587
2017-12-26 22:00:00 1.18587 1.19104 1.18552 1.18885
2017-12-27 22:00:00 1.18885 1.19592 1.18885 1.19426
2017-12-28 22:00:00 1.19426 1.20256 1.19369 1.20092
2017-12-31 22:00:00 1.20092 1.20144 1.19994 1.20144
[63]:
qf = cf.QuantFig(
quotes,#The DataFrame object is passed to the QuantFig constructor
title = 'EUR/USD Exchange Rate',
legend='bottom', #The legend( name='EUR/USD' ) is placed at the top of the plot.
name='EUR/USD' #This gives the data set a name.
)
[64]:
plyo.iplot(
qf.iplot(asFigure=True),#The df.iplot() method is called with parameter asFigure=True to allow for local plottingand #embedding
#image ='png',
filename='qf_01' #The filename for the bitmap to be saved is specified (the file type extension is added #automatically).
)
[65]:
qf.add_bollinger_bands(periods=15,
boll_std=2) #The number of standard deviations to be used for the "band width"
[66]:
plyo.iplot(qf.iplot(asFigure=True),
#image = 'png',
filename='qf_02')
[67]:
qf.add_rsi(periods=14,
showbands=False)
[68]:
plyo.iplot(
qf.iplot(asFigure=True),
# image='png',
filename='qf_03'
)

Static 3D Plotting
[69]:
strike = np.linspace(50, 150, 24) #Strike values between 50 and 150
ttm = np.linspace(0.5, 2.5, 24) #Times-to-maturity between 0.5 and 2.5 years
#The NumPy np.meshgrid() function can generate such a system out of two␣
,→one-dimensional ndarray objects
strike, ttm = np.meshgrid(strike, ttm)
strike[:2].round(1)
[69]:
array([[ 50. , 54.3, 58.7, 63. , 67.4, 71.7, 76.1, 80.4, 84.8,
89.1, 93.5, 97.8, 102.2, 106.5, 110.9, 115.2, 119.6, 123.9,
128.3, 132.6, 137. , 141.3, 145.7, 150. ],
[ 50. , 54.3, 58.7, 63. , 67.4, 71.7, 76.1, 80.4, 84.8,
89.1, 93.5, 97.8, 102.2, 106.5, 110.9, 115.2, 119.6, 123.9,
128.3, 132.6, 137. , 141.3, 145.7, 150. ]])
[70]:
iv = (strike-100)**2 / (100*strike) /ttm #The dummy implied volatility values.
iv[:5, :3]
[70]:
array([[1. , 0.76695652, 0.58132045],
[0.85185185, 0.65333333, 0.4951989 ],
[0.74193548, 0.56903226, 0.43130227],
[0.65714286, 0.504 , 0.38201058],
[0.58974359, 0.45230769, 0.34283001]])
[73]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
[74]:
fig = plt.figure(figsize=(10,6))
ax = fig.gca(projection='3d') #Sets up a canvas for 3D plotting.##############
#Creates the 3D plot. #Array row stride (step size)
surf = ax.plot_surface(strike, ttm, iv, rstride =2, cstride=2, #Array column stride (step size)
cmap=plt.cm.coolwarm, linewidth=0.5,
antialiased = True)
ax.set_xlabel('strike')
ax.set_ylabel('time-to-maturity')
ax.set_zlabel('implied volatility')
fig.colorbar(surf, shrink=0.5, aspect=5); #Creates a color bar.##############
image.png
[75]:
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111, projection='3d')
ax.view_init(30,60)
ax.scatter(strike, ttm, iv, zdir='z', s=25,
c='b', marker='^')
ax.set_xlabel('strike')
ax.set_ylabel('time-to-maturity')
ax.set_zlabel('implied volatility')
[75]: Text(0.5, 0, 'implied volatility')