import numpy as np def convex_set_projection(x, y): """ Convex set projection onto the space defined by y solve the problem: x' = argmin || x - z ||^2 z s.t. z in conv(y) INPUT: x: the vector to be projected y: matrix, each column of y is a vector in the convex set OUTPUT: x': the projection """ m = y.shape[1] alpha = np.ones((m, 1))/m max_iter = 100 epsilon = 1e-7 for iteration in range(max_iter): x_old = x z = y @ alpha x = z - (z-x) @ (z-y).T @ np.linalg.inv((z-y) @ (z-y).T) @ (z-y) if np.linalg.norm(x-x_old) < epsilon: break return x
POCS(Projections onto Convex Sets)算法是一种基于投影的迭代优化算法。给定一组凸集,我们的目标是找到一个属于这些凸集的交集的点。POCS算法通过逐步投影到这些凸集中的每一个来实现。我们使用一个循环来迭代优化,直到符合停止条件。POCS可以有效地处理约束条件较强的问题。
def POCS_clustering(X, y, n_clusters, max_iter=100): """ POCS clustering algorithm based on convex set projection Algorithm: 1. Initialize the centroids as the projection of randomly selected data points 2. Repeat until convergence or max_iter: 2.1 Project all data points onto the convex hull of centroids 2.2 Update the centroids as the projection of the mean of all data points in their respective clusters INPUT: X: data matrix, each row is a data point y: matrix, each column of y is a vector in the convex set n_clusters: number of clusters max_iter: number of maximum iterations (optional parameter) OUTPUT: centroids: a matrix of size (n_clusters, n_features) labels: a vector of size (n_samples,) containing the cluster index for each data point """ #initialize the centroids centroids = np.zeros((n_clusters, X.shape[1])) rnd_indices = np.random.permutation(X.shape[0])[:n_clusters] for idx, centroid in enumerate(centroids): centroids[idx] = convex_set_projection(X[rnd_indices[idx]], y) #initialize other variables labels = np.zeros(X.shape[0], dtype=int) iteration = 0 while True: #assign all data points to the closest centroid for i in range(X.shape[0]): distances = np.linalg.norm(X[i] - centroids, axis=1) labels[i] = np.argmin(distances) #update the centroids for j in range(n_clusters): cluster_points = X[labels == j, :] mean_of_cluster_points = np.mean(cluster_points, axis=0) shifted_mean = convex_set_projection(mean_of_cluster_points, y) centroids[j] = shifted_mean iteration += 1 if iteration >= max_iter: break return centroids, labels