Visualization (Matplotlib)

Matplotlib

  • The most common low-level visualization library for Python.
  • It can create line graphs, scatter plots, density plots, histograms, heatmaps, and so on.
In [2]:
import numpy as np
import matplotlib.pyplot as plt

a=np.array([2, 5, 7, 4, 7, 0, 3, 1, 9, 2])

Simple figure

In [3]:
plt.plot(a)                   # plot the points in the array a
plt.title("My first figure")  # Add a title to the figure
plt.xlabel("My x-axis")       # Give a label to the x-axis
plt.ylabel("My y-axis")       # Give a label to the y-axis
plt.show()                    # Tell matplotlib to output the figure.
                              # Not strictly required in notebooks (but a bit neater).
In [32]:
x = np.random.normal(loc=0, scale=1, size=10)
y = np.random.normal(loc=0, scale=1, size=10)
plt.plot(sorted(x), sorted(y))
Out[32]:
[<matplotlib.lines.Line2D at 0xa235e90>]
In [35]:
plt.plot(sorted(x), sorted(y))
plt.grid()

Scatter plot

In [34]:
plt.plot(a, 'ro')
In [11]:
plt.plot(x, y, 'ro')
Out[11]:
[<matplotlib.lines.Line2D at 0xa024070>]

Combination of line and scatter plot

In [38]:
plt.plot(sorted(x), sorted(y), 'o-', mfc='w')
plt.grid()

Final plot

In [65]:
plt.plot(sorted(x), sorted(y), 'o-', mfc='w')
plt.grid()
plt.xlabel("sorted x") 
plt.ylabel("sorted y") 
plt.title("My second figure")
plt.legend("Sorted data", fontsize=10)
for a, b in zip(sorted(x), sorted(y)):
    plt.text(a, b+0.1, '%.1f'%b, ha='center', va='bottom', fontsize=9)
plt.annotate('A', xy=(-0.3, 0.1), xytext=(0, 0.05),arrowprops=dict(facecolor="r"))
Out[65]:
Text(0,0.05,'A')

Multiple plot on the same figure

In [73]:
x = np.random.randn(1000).cumsum()
y = np.random.randn(1000).cumsum()
plt.plot(x, 'r-')
plt.plot(y, 'g-')
Out[73]:
[<matplotlib.lines.Line2D at 0xa6fc550>]

subfigures

In [4]:
fig, ax = plt.subplots(2,2)
print(ax.shape)
ax[0,0].plot(np.arange(6))          # top left
ax[0,1].plot(np.arange(6,0,-1))     # top right
ax[1,0].plot((-1)**np.arange(6))    # bottom left
ax[1,1].plot((-1)**np.arange(1,7))  # bottom right
plt.show()
(2, 2)
In [5]:
## An alternative way
plt.subplot(2, 2, 1)    # Note the 1-indexing of subplots.
plt.plot(np.arange(6))
plt.subplot(2, 2, 2)
plt.plot(np.arange(6, 0, -1))
plt.subplot(2, 2, 3)
plt.plot((-1)**np.arange(6))
plt.subplot(2, 2, 4)
plt.plot((-1)**np.arange(1, 7))
plt.show()
In [136]:
plt.subplot(2, 2, 1)    # Note the 1-indexing of subplots.
plt.plot(np.arange(6))
plt.subplot(2, 2, 2)
plt.plot(np.arange(6, 0, -1))
plt.subplot(2, 1, 2)
plt.plot((-1)**np.arange(6))
plt.show()

Some common plots

  • Boxplot: plt.boxplot()
  • Barplot: plt.bar(), plt.barh()
  • Pieplot: plt.pie()
  • Histogram:plt.hist
  • ...

Boxplot

In [84]:
x = np.random.randn(1000)
y = np.random.normal(0,1,1000)
plt.boxplot(x)
Out[84]:
{'whiskers': [<matplotlib.lines.Line2D at 0xb969df0>,
  <matplotlib.lines.Line2D at 0xb969ed0>],
 'caps': [<matplotlib.lines.Line2D at 0xb9710f0>,
  <matplotlib.lines.Line2D at 0xb971370>],
 'boxes': [<matplotlib.lines.Line2D at 0xb969b10>],
 'medians': [<matplotlib.lines.Line2D at 0xb971830>],
 'fliers': [<matplotlib.lines.Line2D at 0xb971890>],
 'means': []}
In [85]:
plt.boxplot([x,y])
Out[85]:
{'whiskers': [<matplotlib.lines.Line2D at 0xb9a5db0>,
  <matplotlib.lines.Line2D at 0xb9a5e90>,
  <matplotlib.lines.Line2D at 0xb9aad50>,
  <matplotlib.lines.Line2D at 0xb9aa5b0>],
 'caps': [<matplotlib.lines.Line2D at 0xb9aa0b0>,
  <matplotlib.lines.Line2D at 0xb9aa330>,
  <matplotlib.lines.Line2D at 0xb9b6250>,
  <matplotlib.lines.Line2D at 0xb9b64d0>],
 'boxes': [<matplotlib.lines.Line2D at 0xb9a5ad0>,
  <matplotlib.lines.Line2D at 0xb9aaad0>],
 'medians': [<matplotlib.lines.Line2D at 0xb9aa7f0>,
  <matplotlib.lines.Line2D at 0xb9b6750>],
 'fliers': [<matplotlib.lines.Line2D at 0xb9aa850>,
  <matplotlib.lines.Line2D at 0xb9b69d0>],
 'means': []}

Barplot

In [86]:
x = np.arange(6) 
height = np.arange(6)*10
plt.subplot(1, 2, 1)
plt.bar(x, height)
plt.subplot(1, 2, 2)
plt.barh(x, height)
Out[86]:
<BarContainer object of 6 artists>
In [109]:
x = np.arange(6) 
height1 = np.arange(6)*5 +1
height2 = np.arange(6)*5 + np.random.normal(6)
width=0.4
plt.bar(x, height1, width = width)
plt.bar(x+width, height2, width = width)
for a, b in zip(x, height1):
    plt.text(a, b, format(b, '1.1f'), ha="center", va="bottom", fontsize=8)
for a, b in zip(x, height2):
    plt.text(a+width, b, format(b, '1.1f'),ha="center", va="bottom", fontsize=8)
plt.legend(['A', 'B'])
plt.show()

Pieplot

In [114]:
x = [2, 5, 12, 70, 2, 9]
plt.pie(x, autopct='%1.0f%%')
Out[114]:
([<matplotlib.patches.Wedge at 0xbe3a5b0>,
  <matplotlib.patches.Wedge at 0xbe3aa90>,
  <matplotlib.patches.Wedge at 0xbe3af50>,
  <matplotlib.patches.Wedge at 0xbe45450>,
  <matplotlib.patches.Wedge at 0xbe45930>,
  <matplotlib.patches.Wedge at 0xbe45e50>],
 [Text(1.09783,0.0690696,''),
  Text(1.05632,0.30689,''),
  Text(0.753002,0.801865,''),
  Text(-1.06544,-0.273559,''),
  Text(0.889919,-0.646564,''),
  Text(1.05632,-0.30689,'')],
 [Text(0.598816,0.0376743,'2%'),
  Text(0.576176,0.167395,'5%'),
  Text(0.410728,0.437381,'12%'),
  Text(-0.58115,-0.149214,'70%'),
  Text(0.48541,-0.352671,'2%'),
  Text(0.576176,-0.167395,'9%')])
In [115]:
plt.pie(x, autopct='%1.0f%%')
plt.axis('equal')
Out[115]:
(-1.1151651292868274,
 1.1007221490136585,
 -1.1177914822809132,
 1.1184619011406745)
In [116]:
labels=['A','B','C','D','E', 'F']
plt.pie(x, autopct='%1.0f%%', labels=labels)
plt.axis('equal')
Out[116]:
(-1.1151651292868274,
 1.1007221490136585,
 -1.1177914822809132,
 1.1184619011406745)
In [122]:
plt.pie(x, autopct='%1.0f%%', labels=labels, explode=(0,0,0,0.1,0, 0))
plt.axis('equal')
Out[122]:
(-1.2168663625744343,
 1.1055650648844968,
 -1.1426604659190218,
 1.0935929175025658)
In [126]:
plt.pie(x, autopct='%1.0f%%', labels=labels, explode=(0,0,0,0.1,0, 0),wedgeprops={'width':0.6, 'edgecolor': 'k'})
plt.axis('equal')
Out[126]:
(-1.2168663625744343,
 1.1055650648844968,
 -1.1426604659190218,
 1.0935929175025658)

Histogram

In [129]:
x = np.random.normal(0,1,10000)
plt.hist(x)
Out[129]:
(array([  17.,  129.,  578., 1615., 2614., 2691., 1669.,  540.,  134.,
          13.]),
 array([-3.62583471, -2.89676384, -2.16769296, -1.43862209, -0.70955121,
         0.01951966,  0.74859054,  1.47766141,  2.20673229,  2.93580316,
         3.66487404]),
 <a list of 10 Patch objects>)
In [132]:
plt.hist(x, bins=100)
Out[132]:
(array([  2.,   2.,   3.,   3.,   7.,  11.,  16.,  29.,  25.,  48.,  72.,
         79., 112., 134., 181., 237., 276., 325., 364., 413., 447., 499.,
        503., 603., 562., 605., 575., 563., 479., 469., 448., 378., 338.,
        264., 241., 144., 135., 127.,  76.,  58.,  55.,  30.,  20.,  18.,
         11.,   3.,   5.,   1.,   1.,   3.]),
 array([-3.62583471, -3.48002054, -3.33420636, -3.18839219, -3.04257801,
        -2.89676384, -2.75094966, -2.60513549, -2.45932131, -2.31350714,
        -2.16769296, -2.02187879, -1.87606461, -1.73025044, -1.58443626,
        -1.43862209, -1.29280791, -1.14699374, -1.00117956, -0.85536539,
        -0.70955121, -0.56373704, -0.41792286, -0.27210869, -0.12629451,
         0.01951966,  0.16533384,  0.31114801,  0.45696219,  0.60277636,
         0.74859054,  0.89440471,  1.04021889,  1.18603306,  1.33184724,
         1.47766141,  1.62347559,  1.76928976,  1.91510394,  2.06091811,
         2.20673229,  2.35254646,  2.49836064,  2.64417481,  2.78998899,
         2.93580316,  3.08161734,  3.22743151,  3.37324569,  3.51905986,
         3.66487404]),
 <a list of 50 Patch objects>)
In [135]:
plt.hist(x, bins=100, color='r',  cumulative=True)
Out[135]:
(array([   17.,   146.,   724.,  2339.,  4953.,  7644.,  9313.,  9853.,
         9987., 10000.]),
 array([-3.62583471, -2.89676384, -2.16769296, -1.43862209, -0.70955121,
         0.01951966,  0.74859054,  1.47766141,  2.20673229,  2.93580316,
         3.66487404]),
 <a list of 10 Patch objects>)

Other data visualization libraries

  • Seaborn is a higher-level plotting library that is build on top of matplotlib. It allows easy creation of more complicated plots. The figures it produces also look prettier than ones created by matplotlib with its default settings.
  • Plotly: A powerful tool for creating interactive plots.
In [91]:
x
Out[91]:
array([0, 1, 2, 3, 4, 5])
In [ ]: