Académique Documents
Professionnel Documents
Culture Documents
import math
NUM_CLUSTERS = 2
TOTAL_DATA = 7
LOWEST_SAMPLE_POINT = 0 #element 0 of SAMPLES.
HIGHEST_SAMPLE_POINT = 3 #element 3 of SAMPLES.
BIG_NUMBER = math.pow(10, 10)
SAMPLES = [[1.0, 1.0], [1.5, 2.0], [3.0, 4.0], [5.0, 7.0], [3.5, 5.0], [4.5,
5.0], [3.5, 4.5]]
data = []
centroids = []
class DataPoint:
def __init__(self, x, y):
self.x = x
self.y = y
def get_x(self):
return self.x
def get_y(self):
return self.y
def get_cluster(self):
return self.clusterNumber
class Centroid:
def __init__(self, x, y):
self.x = x
self.y = y
def get_x(self):
return self.x
def get_y(self):
return self.y
def initialize_centroids():
# Set the centoid coordinates to match the data points furthest from each
other.
# In this example, (1.0, 1.0) and (5.0, 7.0)
centroids.append(Centroid(SAMPLES[LOWEST_SAMPLE_POINT][0],
SAMPLES[LOWEST_SAMPLE_POINT][1]))
centroids.append(Centroid(SAMPLES[HIGHEST_SAMPLE_POINT][0],
SAMPLES[HIGHEST_SAMPLE_POINT][1]))
def initialize_datapoints():
# DataPoint objects' x and y values are taken from the SAMPLE array.
# The DataPoints associated with LOWEST_SAMPLE_POINT and
HIGHEST_SAMPLE_POINT are initially
# assigned to the clusters matching the LOWEST_SAMPLE_POINT and
HIGHEST_SAMPLE_POINT centroids.
for i in range(TOTAL_DATA):
newPoint = DataPoint(SAMPLES[i][0], SAMPLES[i][1])
if(i == LOWEST_SAMPLE_POINT):
newPoint.set_cluster(0)
elif(i == HIGHEST_SAMPLE_POINT):
newPoint.set_cluster(1)
else:
newPoint.set_cluster(None)
data.append(newPoint)
return
def recalculate_centroids():
totalX = 0
totalY = 0
totalInCluster = 0
for j in range(NUM_CLUSTERS):
for k in range(len(data)):
if(data[k].get_cluster() == j):
totalX += data[k].get_x()
totalY += data[k].get_y()
totalInCluster += 1
def update_clusters():
isStillMoving = 0
for i in range(TOTAL_DATA):
bestMinimum = BIG_NUMBER
currentCluster = 0
for j in range(NUM_CLUSTERS):
distance = get_distance(data[i].get_x(), data[i].get_y(),
centroids[j].get_x(), centroids[j].get_y())
if(distance < bestMinimum):
bestMinimum = distance
currentCluster = j
data[i].set_cluster(currentCluster)
return isStillMoving
def perform_kmeans():
isStillMoving = 1
initialize_centroids()
initialize_datapoints()
while(isStillMoving):
recalculate_centroids()
isStillMoving = update_clusters()
return
def print_results():
for i in range(NUM_CLUSTERS):
print("Cluster ", i, " includes:")
for j in range(TOTAL_DATA):
if(data[j].get_cluster() == i):
print("(", data[j].get_x(), ", ", data[j].get_y(), ")")
print()
return
perform_kmeans()
print_results()