Histograms in Matplotlib
Histograms are a type of bar plot that shows the frequency of data values within certain ranges. In this article, we will explore how to create histograms using Matplotlib, a popular data visualization library in Python.
Basic Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram
plt.hist(data, bins=30)
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-1.png)
Customizing Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with customizations
plt.hist(data, bins=50, color='skyblue', alpha=0.7)
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Customized Histogram')
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-2.png)
Multiple Histograms
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data1 = np.random.randn(1000)
data2 = np.random.randn(1000)
# Plot multiple histograms
plt.hist(data1, bins=30, alpha=0.5, label='Data1')
plt.hist(data2, bins=30, alpha=0.5, label='Data2')
plt.legend()
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-3.png)
Stacked Histograms
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data1 = np.random.randn(1000)
data2 = np.random.randn(1000)
# Plot stacked histograms
plt.hist([data1, data2], bins=30, stacked=True)
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-4.png)
Histogram with Different Bin Sizes
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with different bin sizes
plt.hist(data, bins=[-3, -1, 0, 1, 3])
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-5.png)
Adding Grid to Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with grid
plt.hist(data, bins=30)
plt.grid(True)
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-6.png)
Customizing Bar Width
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with custom bar width
plt.hist(data, bins=30, rwidth=0.9)
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-7.png)
Log Scale Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with log scale
plt.hist(data, bins=30)
plt.yscale('log')
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-8.png)
Grouped Histograms
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data1 = np.random.randn(1000)
data2 = np.random.randn(1000)
data3 = np.random.randn(1000)
# Plot grouped histograms
plt.hist(data1, bins=30, alpha=0.7, label='Data1')
plt.hist(data2, bins=30, alpha=0.7, label='Data2')
plt.hist(data3, bins=30, alpha=0.7, label='Data3')
plt.legend()
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-9.png)
Cumulative Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot cumulative histogram
plt.hist(data, bins=30, cumulative=True)
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-10.png)
Histogram with Density
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with density
plt.hist(data, bins=30, density=True)
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-11.png)
Plotting Horizontal Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot horizontal histogram
plt.hist(data, bins=30, orientation='horizontal')
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-12.png)
Histogram with Outliers
import matplotlib.pyplot as plt
import numpy as np
# Generate random data with outliers
data = np.random.randn(1000)
data[0] = 10
# Plot histogram with outliers
plt.hist(data, bins=30)
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-13.png)
Histogram with Customized Color
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with customized color
plt.hist(data, bins=30, color='orange')
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-14.png)
Histogram with Error Bars
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
data = np.random.randn(1000)
# Plot histogram with error bars
counts, bins, _ = plt.hist(data, bins=30)
bin_centers = 0.5 * (bins[1:] + bins[:-1])
errors = np.sqrt(counts)
plt.errorbar(bin_centers, counts, yerr=errors, fmt='o', color='red')
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-15.png)
Overlaying Histogram with Kernel Density Estimate
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
# Generate random data
data = np.random.randn(1000)
# Plot histogram and kernel density estimate
plt.hist(data, bins=30, density=True, alpha=0.7)
kde = gaussian_kde(data)
x = np.linspace(min(data), max(data), 1000)
plt.plot(x, kde(x), color='red')
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-16.png)
3D Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
x = np.random.randn(1000)
y = np.random.randn(1000)
# Plot 3D histogram
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
hist, xedges, yedges = np.histogram2d(x, y, bins=30)
xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
xpos = xpos.ravel()
ypos = ypos.ravel()
zpos = 0
dx = dy = np.ones(900) * 0.1
dz = hist.ravel()
ax.bar3d(xpos, ypos, zpos, dx, dy, dz, zsort='average')
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-17.png)
Scatter Histogram
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
x = np.random.randn(1000)
y = np.random.randn(1000)
# Plot scatter histogram
plt.hexbin(x, y, gridsize=30)
plt.colorbar()
plt.show()
Output:
![Histograms in Matplotlib](https://apidemos.geek-docs.com/matplotlib/2024/06/24/20240608163708-18.png)
Summary
In this article, we have explored various ways to create and customize histograms using Matplotlib. Histograms are a powerful tool for visualizing the distribution of data and can help in gaining insights into the underlying patterns. Experiment with different parameters and customizations to create impactful visualizations for your data analysis projects.