Hey everyone, I have three branches of points and I would like to split them into three groups, each group representing one branch, with KMeans clustering.
I’ve tried to do this in GHpython (with chatgpt’s help), but it doesn’t seem to give me the right clusters.
import random
from math import sqrt
Helper function to calculate Euclidean distance
def distance(p1, p2):
“”“Calculate Euclidean distance between two 3D points.”“”
return sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2 + (p1[2] - p2[2])**2)
K-means++ initialization to pick initial centroids
def initialize_centroids_kmeans_pp(points, num_clusters):
“”“Initialize centroids using K-means++ to improve initial clustering.”“”
centroids =
centroids.append(random.choice(points)) # Randomly pick the first centroid
print(centroids)
for _ in range(1, num_clusters):
distances = []
for p in points:
# Calculate the minimum distance to the closest existing centroid
min_distance = min([distance(p, c) for c in centroids])
distances.append(min_distance**2) # Square the distance for probability
# Select a new centroid based on weighted probabilities
total_distance = sum(distances)
prob_distribution = [d / total_distance for d in distances]
rand_value = random.random() * total_distance
cumulative = 0.0
for i, prob in enumerate(prob_distribution):
cumulative += prob * total_distance
if cumulative >= rand_value:
centroids.append(points[i])
break
return centroids
K-means algorithm with K-means++ initialization
def k_means(points, num_clusters, max_iterations=100):
“”“K-means clustering algorithm with K-means++ initialization.”“”
num_clusters = int(num_clusters) # Ensure num_clusters is an integer
# Initialize centroids using K-means++
centroids = initialize_centroids_kmeans_pp(points, num_clusters)
clusters = [[] for _ in range(num_clusters)]
for _ in range(max_iterations):
# Step 1: Assign points to the nearest centroid
clusters = [[] for _ in range(num_clusters)]
for i, p in enumerate(points):
distances = [distance(p, c) for c in centroids]
cluster_idx = distances.index(min(distances))
clusters[cluster_idx].append(i)
# Step 2: Update centroids as the mean of their clusters
new_centroids = []
for cluster in clusters:
if cluster:
cluster_points = [points[i] for i in cluster]
new_centroid = tuple(sum(c) / len(c) for c in zip(*cluster_points))
new_centroids.append(new_centroid)
else:
# If a cluster is empty, keep the previous centroid
new_centroids.append(centroids[clusters.index(cluster)])
# Stop if centroids do not change
if new_centroids == centroids:
break
centroids = new_centroids
return clusters, centroids
Ensure inputs are lists of numbers
if not isinstance(x, list) or not isinstance(y, list) or not isinstance(z, list):
raise ValueError(“Inputs x, y, and z must be lists of numbers.”)
Combine x, y, z inputs into a list of points
points = [(x[i], y[i], z[i]) for i in range(len(x))]
Ensure k is an integer
k = int(k)
Perform K-means clustering
clusters, centroids = k_means(points, k)
Outputs
a = clusters # Clustered point indices
b = centroids # Centroid coordinates
print(“Final Clusters:”, clusters)
print(“Final Centroids:”, centroids)
print(“x-values:”, x)
print(“y-values:”,y)
print(“z-values:”,z)