Generating and Visualizing Multi-cluster Data with Gaussian KDE
This Python script generates synthetic data points for a specified number of clusters and visualizes them using Gaussian Kernel Density Estimation (KDE) with Matplotlib. It creates an illustrative graph showing the density of the generated data points across different clusters, helping in understanding the distribution and relationship between the data points visually.
import numpy as np from scipy import stats import matplotlib.pyplot as plt def create_example_data(n, clusters=2): coords = list() for cluster in range(clusters): m1 = np.random.normal(size=n, loc=10*cluster) m2 = np.random.normal(scale=0.5, size=n, loc=10*cluster) a = np.random.uniform(-1, 1) b = np.random.uniform(-1, 1) coords.append([[m1+(a*m2)], [m1-(b*m2)]]) return np.vstack(coords).reshape(2, n*clusters) d = create_example_data(1000) x = d[0] y = d[1] xmin = x.min() xmax = x.max() ymin = y.min() ymax = y.max() X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j] positions = np.vstack([X.ravel(), Y.ravel()]) values = np.vstack([x, y]) kernel = stats.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) fig, ax = plt.subplots(1, 1, figsize=(10, 7)) ax.imshow(np.rot90(Z), extent=[xmin, xmax, ymin, ymax]) ax.plot(x, y, 'o', alpha=0.1, zorder=1) plt.show()