Visualization (Matplotlib)

Matplotlib

  • The most common low-level visualization library for Python.
  • It can create line graphs, scatter plots, density plots, histograms, heatmaps, and so on.
In [1]:
import numpy as np
import matplotlib.pyplot as plt

a=np.array([2, 5, 7, 4, 7, 0, 3, 1, 9, 2])

Simple figure

In [2]:
plt.plot(a)                   # plot the points in the array a
plt.title("My first figure")  # Add a title to the figure
plt.xlabel("My x-axis")       # Give a label to the x-axis
plt.ylabel("My y-axis")       # Give a label to the y-axis
plt.show()                    # Tell matplotlib to output the figure.
                              # Not strictly required in notebooks (but a bit neater).
In [3]:
x = np.random.normal(loc=0, scale=1, size=10)
y = np.random.normal(loc=0, scale=1, size=10)
plt.plot(sorted(x), sorted(y))
Out[3]:
[<matplotlib.lines.Line2D at 0xbc13270>]
In [4]:
plt.plot(sorted(x), sorted(y))
plt.grid()

Scatter plot

In [5]:
plt.plot(a, 'ro')
Out[5]:
[<matplotlib.lines.Line2D at 0xbc4cff0>]
In [6]:
plt.plot(x, y, 'ro')
Out[6]:
[<matplotlib.lines.Line2D at 0xbc87630>]

Combination of line and scatter plot

In [7]:
plt.plot(sorted(x), sorted(y), 'o-', mfc='w')
plt.grid()

Final plot

In [8]:
plt.plot(sorted(x), sorted(y), 'o-', mfc='w')
plt.grid()
plt.xlabel("sorted x") 
plt.ylabel("sorted y") 
plt.title("My second figure")
plt.legend("Sorted data", fontsize=10)
for a, b in zip(sorted(x), sorted(y)):
    plt.text(a, b+0.1, '%.1f'%b, ha='center', va='bottom', fontsize=9)
plt.annotate('A', xy=(-0.3, 0.1), xytext=(0, 0.05),arrowprops=dict(facecolor="r"))
Out[8]:
Text(0,0.05,'A')

Multiple plot on the same figure

In [9]:
x = np.random.randn(1000).cumsum()
y = np.random.randn(1000).cumsum()
plt.plot(x, 'r-')
plt.plot(y, 'g-')
Out[9]:
[<matplotlib.lines.Line2D at 0xcd273f0>]

subfigures

In [10]:
fig, ax = plt.subplots(2,2)
print(ax.shape)
ax[0,0].plot(np.arange(6))          # top left
ax[0,1].plot(np.arange(6,0,-1))     # top right
ax[1,0].plot((-1)**np.arange(6))    # bottom left
ax[1,1].plot((-1)**np.arange(1,7))  # bottom right
plt.show()
(2, 2)
In [11]:
## An alternative way
plt.subplot(2, 2, 1)    # Note the 1-indexing of subplots.
plt.plot(np.arange(6))
plt.subplot(2, 2, 2)
plt.plot(np.arange(6, 0, -1))
plt.subplot(2, 2, 3)
plt.plot((-1)**np.arange(6))
plt.subplot(2, 2, 4)
plt.plot((-1)**np.arange(1, 7))
plt.show()
In [12]:
plt.subplot(2, 2, 1)    # Note the 1-indexing of subplots.
plt.plot(np.arange(6))
plt.subplot(2, 2, 2)
plt.plot(np.arange(6, 0, -1))
plt.subplot(2, 1, 2)
plt.plot((-1)**np.arange(6))
plt.show()

Some common plots

  • Boxplot: plt.boxplot()
  • Barplot: plt.bar(), plt.barh()
  • Pieplot: plt.pie()
  • Histogram:plt.hist
  • ...

Boxplot

In [13]:
x = np.random.randn(1000)
y = np.random.normal(0,1,1000)
plt.boxplot(x)
Out[13]:
{'whiskers': [<matplotlib.lines.Line2D at 0xcf93e10>,
  <matplotlib.lines.Line2D at 0xcf93d50>],
 'caps': [<matplotlib.lines.Line2D at 0xcf43350>,
  <matplotlib.lines.Line2D at 0xcf43150>],
 'boxes': [<matplotlib.lines.Line2D at 0xcf930f0>],
 'medians': [<matplotlib.lines.Line2D at 0xcf43490>],
 'fliers': [<matplotlib.lines.Line2D at 0xcf43890>],
 'means': []}
In [14]:
plt.boxplot([x,y])
Out[14]:
{'whiskers': [<matplotlib.lines.Line2D at 0xceb27f0>,
  <matplotlib.lines.Line2D at 0xceb22d0>,
  <matplotlib.lines.Line2D at 0xceaa050>,
  <matplotlib.lines.Line2D at 0xceaa3b0>],
 'caps': [<matplotlib.lines.Line2D at 0xceb2530>,
  <matplotlib.lines.Line2D at 0xceb23d0>,
  <matplotlib.lines.Line2D at 0xceaa970>,
  <matplotlib.lines.Line2D at 0xceaacd0>],
 'boxes': [<matplotlib.lines.Line2D at 0xceb24b0>,
  <matplotlib.lines.Line2D at 0xceaa2b0>],
 'medians': [<matplotlib.lines.Line2D at 0xceb2210>,
  <matplotlib.lines.Line2D at 0xcee6070>],
 'fliers': [<matplotlib.lines.Line2D at 0xceaafd0>,
  <matplotlib.lines.Line2D at 0xcee62d0>],
 'means': []}

Barplot

In [15]:
x = np.arange(6) 
height = np.arange(6)*10
plt.subplot(1, 2, 1)
plt.bar(x, height)
plt.subplot(1, 2, 2)
plt.barh(x, height)
Out[15]:
<BarContainer object of 6 artists>
In [16]:
x = np.arange(6) 
height1 = np.arange(6)*5 +1
height2 = np.arange(6)*5 + np.random.normal(6)
width=0.4
plt.bar(x, height1, width = width)
plt.bar(x+width, height2, width = width)
for a, b in zip(x, height1):
    plt.text(a, b, format(b, '1.1f'), ha="center", va="bottom", fontsize=8)
for a, b in zip(x, height2):
    plt.text(a+width, b, format(b, '1.1f'),ha="center", va="bottom", fontsize=8)
plt.legend(['A', 'B'])
plt.show()

Pieplot

In [17]:
x = [2, 5, 12, 70, 2, 9]
plt.pie(x, autopct='%1.0f%%')
Out[17]:
([<matplotlib.patches.Wedge at 0xcf45570>,
  <matplotlib.patches.Wedge at 0xcf45a10>,
  <matplotlib.patches.Wedge at 0xcf45ed0>,
  <matplotlib.patches.Wedge at 0xcf573d0>,
  <matplotlib.patches.Wedge at 0xcf578b0>,
  <matplotlib.patches.Wedge at 0xcf57dd0>],
 [Text(1.09783,0.0690696,''),
  Text(1.05632,0.30689,''),
  Text(0.753002,0.801865,''),
  Text(-1.06544,-0.273559,''),
  Text(0.889919,-0.646564,''),
  Text(1.05632,-0.30689,'')],
 [Text(0.598816,0.0376743,'2%'),
  Text(0.576176,0.167395,'5%'),
  Text(0.410728,0.437381,'12%'),
  Text(-0.58115,-0.149214,'70%'),
  Text(0.48541,-0.352671,'2%'),
  Text(0.576176,-0.167395,'9%')])
In [18]:
plt.pie(x, autopct='%1.0f%%')
plt.axis('equal')
Out[18]:
(-1.1151651292868274,
 1.1007221490136585,
 -1.1177914822809132,
 1.1184619011406745)
In [19]:
labels=['A','B','C','D','E', 'F']
plt.pie(x, autopct='%1.0f%%', labels=labels)
plt.axis('equal')
Out[19]:
(-1.1151651292868274,
 1.1007221490136585,
 -1.1177914822809132,
 1.1184619011406745)
In [20]:
plt.pie(x, autopct='%1.0f%%', labels=labels, explode=(0,0,0,0.1,0, 0))
plt.axis('equal')
Out[20]:
(-1.2168663625744343,
 1.1055650648844968,
 -1.1426604659190218,
 1.0935929175025658)
In [21]:
plt.pie(x, autopct='%1.0f%%', labels=labels, explode=(0,0,0,0.1,0, 0),wedgeprops={'width':0.6, 'edgecolor': 'k'})
plt.axis('equal')
Out[21]:
(-1.2168663625744343,
 1.1055650648844968,
 -1.1426604659190218,
 1.0935929175025658)

Histogram

In [22]:
x = np.random.normal(0,1,10000)
plt.hist(x)
Out[22]:
(array([   5.,  100.,  526., 1643., 2727., 2709., 1642.,  549.,   94.,
           5.]),
 array([-3.81671808, -3.05492747, -2.29313687, -1.53134626, -0.76955566,
        -0.00776505,  0.75402555,  1.51581616,  2.27760677,  3.03939737,
         3.80118798]),
 <a list of 10 Patch objects>)
In [23]:
plt.hist(x, bins=100)
Out[23]:
(array([  1.,   0.,   0.,   0.,   0.,   0.,   0.,   3.,   1.,   0.,   1.,
          0.,   6.,   5.,   7.,  14.,  11.,  11.,  18.,  27.,  28.,  37.,
         26.,  39.,  53.,  46.,  58.,  67.,  95.,  77., 102.,  94., 120.,
        128., 175., 197., 175., 202., 210., 240., 260., 236., 270., 260.,
        256., 258., 280., 313., 305., 289., 308., 295., 269., 279., 277.,
        267., 283., 234., 251., 246., 207., 211., 207., 187., 176., 167.,
        144., 131., 109., 103.,  66.,  92.,  75.,  67.,  62.,  52.,  47.,
         24.,  39.,  25.,  11.,  14.,  18.,  13.,   6.,   9.,   7.,   8.,
          2.,   6.,   1.,   0.,   0.,   1.,   1.,   1.,   0.,   0.,   0.,
          1.]),
 array([-3.81671808, -3.74053902, -3.66435996, -3.5881809 , -3.51200184,
        -3.43582278, -3.35964372, -3.28346466, -3.2072856 , -3.13110654,
        -3.05492747, -2.97874841, -2.90256935, -2.82639029, -2.75021123,
        -2.67403217, -2.59785311, -2.52167405, -2.44549499, -2.36931593,
        -2.29313687, -2.21695781, -2.14077875, -2.06459969, -1.98842063,
        -1.91224157, -1.83606251, -1.75988344, -1.68370438, -1.60752532,
        -1.53134626, -1.4551672 , -1.37898814, -1.30280908, -1.22663002,
        -1.15045096, -1.0742719 , -0.99809284, -0.92191378, -0.84573472,
        -0.76955566, -0.6933766 , -0.61719754, -0.54101848, -0.46483942,
        -0.38866035, -0.31248129, -0.23630223, -0.16012317, -0.08394411,
        -0.00776505,  0.06841401,  0.14459307,  0.22077213,  0.29695119,
         0.37313025,  0.44930931,  0.52548837,  0.60166743,  0.67784649,
         0.75402555,  0.83020461,  0.90638368,  0.98256274,  1.0587418 ,
         1.13492086,  1.21109992,  1.28727898,  1.36345804,  1.4396371 ,
         1.51581616,  1.59199522,  1.66817428,  1.74435334,  1.8205324 ,
         1.89671146,  1.97289052,  2.04906958,  2.12524864,  2.20142771,
         2.27760677,  2.35378583,  2.42996489,  2.50614395,  2.58232301,
         2.65850207,  2.73468113,  2.81086019,  2.88703925,  2.96321831,
         3.03939737,  3.11557643,  3.19175549,  3.26793455,  3.34411361,
         3.42029267,  3.49647173,  3.5726508 ,  3.64882986,  3.72500892,
         3.80118798]),
 <a list of 100 Patch objects>)
In [24]:
plt.hist(x, bins=100, color='r',  cumulative=True)
Out[24]:
(array([1.000e+00, 1.000e+00, 1.000e+00, 1.000e+00, 1.000e+00, 1.000e+00,
        1.000e+00, 4.000e+00, 5.000e+00, 5.000e+00, 6.000e+00, 6.000e+00,
        1.200e+01, 1.700e+01, 2.400e+01, 3.800e+01, 4.900e+01, 6.000e+01,
        7.800e+01, 1.050e+02, 1.330e+02, 1.700e+02, 1.960e+02, 2.350e+02,
        2.880e+02, 3.340e+02, 3.920e+02, 4.590e+02, 5.540e+02, 6.310e+02,
        7.330e+02, 8.270e+02, 9.470e+02, 1.075e+03, 1.250e+03, 1.447e+03,
        1.622e+03, 1.824e+03, 2.034e+03, 2.274e+03, 2.534e+03, 2.770e+03,
        3.040e+03, 3.300e+03, 3.556e+03, 3.814e+03, 4.094e+03, 4.407e+03,
        4.712e+03, 5.001e+03, 5.309e+03, 5.604e+03, 5.873e+03, 6.152e+03,
        6.429e+03, 6.696e+03, 6.979e+03, 7.213e+03, 7.464e+03, 7.710e+03,
        7.917e+03, 8.128e+03, 8.335e+03, 8.522e+03, 8.698e+03, 8.865e+03,
        9.009e+03, 9.140e+03, 9.249e+03, 9.352e+03, 9.418e+03, 9.510e+03,
        9.585e+03, 9.652e+03, 9.714e+03, 9.766e+03, 9.813e+03, 9.837e+03,
        9.876e+03, 9.901e+03, 9.912e+03, 9.926e+03, 9.944e+03, 9.957e+03,
        9.963e+03, 9.972e+03, 9.979e+03, 9.987e+03, 9.989e+03, 9.995e+03,
        9.996e+03, 9.996e+03, 9.996e+03, 9.997e+03, 9.998e+03, 9.999e+03,
        9.999e+03, 9.999e+03, 9.999e+03, 1.000e+04]),
 array([-3.81671808, -3.74053902, -3.66435996, -3.5881809 , -3.51200184,
        -3.43582278, -3.35964372, -3.28346466, -3.2072856 , -3.13110654,
        -3.05492747, -2.97874841, -2.90256935, -2.82639029, -2.75021123,
        -2.67403217, -2.59785311, -2.52167405, -2.44549499, -2.36931593,
        -2.29313687, -2.21695781, -2.14077875, -2.06459969, -1.98842063,
        -1.91224157, -1.83606251, -1.75988344, -1.68370438, -1.60752532,
        -1.53134626, -1.4551672 , -1.37898814, -1.30280908, -1.22663002,
        -1.15045096, -1.0742719 , -0.99809284, -0.92191378, -0.84573472,
        -0.76955566, -0.6933766 , -0.61719754, -0.54101848, -0.46483942,
        -0.38866035, -0.31248129, -0.23630223, -0.16012317, -0.08394411,
        -0.00776505,  0.06841401,  0.14459307,  0.22077213,  0.29695119,
         0.37313025,  0.44930931,  0.52548837,  0.60166743,  0.67784649,
         0.75402555,  0.83020461,  0.90638368,  0.98256274,  1.0587418 ,
         1.13492086,  1.21109992,  1.28727898,  1.36345804,  1.4396371 ,
         1.51581616,  1.59199522,  1.66817428,  1.74435334,  1.8205324 ,
         1.89671146,  1.97289052,  2.04906958,  2.12524864,  2.20142771,
         2.27760677,  2.35378583,  2.42996489,  2.50614395,  2.58232301,
         2.65850207,  2.73468113,  2.81086019,  2.88703925,  2.96321831,
         3.03939737,  3.11557643,  3.19175549,  3.26793455,  3.34411361,
         3.42029267,  3.49647173,  3.5726508 ,  3.64882986,  3.72500892,
         3.80118798]),
 <a list of 100 Patch objects>)

Other data visualization libraries

  • Seaborn is a higher-level plotting library that is build on top of matplotlib. It allows easy creation of more complicated plots. The figures it produces also look prettier than ones created by matplotlib with its default settings.
  • Plotly: A powerful tool for creating interactive plots.
In [25]:
x
Out[25]:
array([-1.67410354,  0.24189083, -0.38457066, ..., -0.29009073,
       -0.22212898,  0.48443177])