目录
实验要求
- 掌握k-mean算法的原理。
- 学会编写k-mean的程序实现和应用。
- 理解和掌握通用框架的实现原则与技巧。
- 在选取初始聚类中心的时候要做一些工作,不要使得聚类中心集中到一个簇之中。
实验内容
实验背景介绍
K-mean是机器学习中重要的算法之一,有许多版本变种和应用场景。该算法的执行效率较高,应用的结果可展示性较好。
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:1646d785-7a89-4f1b-bd6c-bbc8d8c7c294
[En]
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:be004979-5709-459a-ac8c-49f3f8ae7f36
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:f139cf69-8fb8-4242-bb30-db5668c684b2
[En]
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:f5d62bd2-76ae-4899-a17f-99a0c9e49b68
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:c691d985-0d54-4cdc-a66c-ee20384a3e8a
[En]
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:5b289fa8-e1ed-4443-a2dd-4859039e65a5
public interface KmObj extends Comparable<kmobj> {
public double getDistance(KmObj other); //求两点间的距离
public int compareTo(KmObj other); //点排序,为了核对聚类中心是否重合
public KmObj getCenter(KmObj[] objs, int[] serials); // 通过serials选定标号,求它们的中心
}
</kmobj>
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:dc5b627a-48f2-4d98-86da-b32b2cc3825e
[En]
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:70a2dc7a-0307-43fd-b65d-183fa6d4c2f9
public static int[][] kmean(KmObj[] objs, int k)
2.输入输出及具体要求
(1) 界面打点聚类
该系统的界面如图1所示,你可以增加另外的辅助功能,如”清除所有的点和聚类中心”等。
(2)图像像素聚类压缩
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:98200130-6f4e-4a42-bf26-c73ad57e0725
[En]
[TencentCloudSDKException] code:FailedOperation.ServiceIsolate message:service is stopped due to arrears, please recharge your account in Tencent Cloud requestId:b7c49fb0-dde2-487e-b5a4-390cf13a1b74
实现方法:
我使用python对k-means算法进行了简单的实现。我在p1.py(打点求中心)和p2.py(图像压缩)中分别使用这个算法对二维的坐标数据和三维的RGB数据进行了处理。
首先是p1.py.我使用tkinter包实现了简单的打点界面。具体代码如下:
from tkinter import *
import random
canvas_width = 1000
canvas_height = 500
def paint(event):
python_green = "#476042"
x1, y1 = (event.x - 1), (event.y - 1)
x2, y2 = (event.x + 1), (event.y + 1)
w.create_oval(x1, y1, x2, y2, fill=python_green)
points.append((event.x, event.y))
def distance(x, y):
sum = 0
for i in range(len(x)):
sum += (x[i] - y[i]) ** 2
ans = sum ** 0.5
return ans
def k_means(event):
point = points.copy()
group_num = int(ju_num.get())
if len(point) < group_num:
hint_str.set('您的点个数比分类数还少!请检查输入是否有误')
return
center = []
for i in range(group_num):
center = random.sample(point, group_num)
new_center = []
times = 0
while times < 50 and center != new_center:
if times != 0:
center = new_center.copy()
new_center = []
times += 1
print(center)
groups = []
for x in center:
groups.append([])
for p in point:
distances = []
for i in center:
distances.append(distance(p, i))
groups[distances.index(min(distances))].append(p)
for group in groups:
sum_x = 0
sum_y = 0
for i in group:
sum_x += i[0]
sum_y += i[1]
try:
new_center.append((sum_x / len(group), sum_y / len(group)))
except ZeroDivisionError:
print('分组出现空,计算可能有误')
new_center.append((canvas_width / 2, canvas_height / 2))
for i in new_center:
x1, y1 = (i[0] - 3), (i[1] - 3)
x2, y2 = (i[0] + 3), (i[1] + 3)
w.create_oval(x1, y1, x2, y2, fill="#00FF7F")
hint_str.set('聚类中心已计算并标出!')
button['state'] = DISABLED
if __name__ == '__main__':
points = []
root = Tk()
root.title("K-means聚类算法演示系统")
w = Canvas(root, width=canvas_width, height=canvas_height)
w.pack(expand=YES, fill=BOTH)
w.bind("", paint)
hint_str = StringVar()
hint_str.set('点击窗口任意位置以绘制点')
hint = Label(root, textvariable=hint_str)
hint.pack(side=TOP)
bottom_frame = Frame(root)
bottom_frame.pack(side=BOTTOM)
message = Label(bottom_frame, text="请输入最终的聚类数目:")
message.grid(column=0, row=0)
ju_num = StringVar()
ju_num.set('3')
entry = Entry(bottom_frame, textvariable=ju_num)
entry.grid(column=1, row=0)
button = Button(bottom_frame, text='K-means!')
button.grid(column=2, row=0)
button.bind('', k_means)
mainloop()
然后是p2.py.我使用PIL包读取了源文件同目录下的trump.jpeg,经过处理后将结果输出在同目录下的result.jpg中。具体代码如下:
from PIL import Image
import numpy as np
import random
def write_image(array, path):
img = Image.fromarray(array)
img.save(path)
def distance(x, y):
sum = 0
for i in range(len(x)):
sum += (int(x[i]) - int(y[i])) ** 2
ans = sum ** 0.5
return ans
def k_means(pic_array, group_num):
line_num = 0
col_num = 0
array = []
for line in pic_array:
line_num += 1
for col in line:
if line_num == 1:
col_num += 1
array.append([col[0], col[1], col[2]])
center = []
for i in range(group_num):
center = random.sample(array, group_num)
print(center)
new_center = []
times = 0
while times < 50 and center != new_center:
if times != 0:
center = new_center.copy()
new_center = []
times += 1
print(center)
groups = []
for x in center:
groups.append([])
for p in array:
distances = []
for i in center:
distances.append(distance(p, i))
groups[distances.index(min(distances))].append(p)
for group in groups:
sum_r = 0
sum_g = 0
sum_b = 0
for i in group:
sum_r += i[0]
sum_g += i[1]
sum_b += i[2]
try:
new_center.append([sum_r / len(group), sum_g / len(group), sum_b / len(group)])
except ZeroDivisionError:
print('分组出现空,计算可能有误')
new_center.append([100, 100, 100])
for line in pic_array:
for col in line:
distances = []
for i in new_center:
distances.append(distance(col, i))
index = distances.index(min(distances))
col[0] = int(new_center[index][0])
col[1] = int(new_center[index][1])
col[2] = int(new_center[index][2])
print('k—means已完成')
return pic_array
if __name__ == '__main__':
image_path = 'trump.jpeg'
image = Image.open(image_path)
image_array = np.array(image)
group_num = int(input('分组数:'))
new_array = k_means(image_array, group_num)
print('生成图片输出中...')
write_image(new_array, 'result.jpg')
print('输出完成')
实验结果
Original: https://blog.csdn.net/xiangQiAtCSDN/article/details/117434009
Author: ChessZH
Title: 数据挖掘实验二:K-means算法及其应用(Python实现)
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/561754/
转载文章受原作者版权保护。转载请注明原作者出处!