from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
iris = load_iris()
data = iris.data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(data_scaled)
labels = kmeans.predict(data_scaled)
cluster_sizes = np.unique(labels, return_counts=True)[1]
print("每个簇的样本数量:", cluster_sizes)
cluster_centers = kmeans.cluster_centers_
print("每个簇的质心:\n", cluster_centers)
x = data[:, 0]
y = data[:, 2]
sse = kmeans.inertia_
print("SSE:", sse)
silhouette_score = metrics.silhouette_score(data_scaled, labels)
print("轮廓系数:", silhouette_score)
plt.scatter(x, y, c=labels, cmap='viridis')
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
plt.scatter(cluster_centers[:, 0], cluster_centers[:, 2], marker='x', c='red', s=200)
plt.xlabel('花萼长度')
plt.ylabel('花瓣长度')
plt.title('K-平均算法聚类结果')
plt.show()