3.聚类–K-means的Java实现

java

K-means的步骤

输入: 含n 个样本的数据集,簇的数据K

输出: K 个簇

算法步骤:

1.初始化K个簇类中心C1,C2,-……Ck (通常随机选择)

2.repeat 步骤3,4

3,将数据集中的每个样本分配到与之最近的中心Ci所在的簇Cj ;

4. 更新聚类中心Ci,即计算各个簇的样本均值;

5.直到样本分配不在改变

上代码:

import java.lang.annotation.ElementType;

import java.lang.annotation.Retention;

import java.lang.annotation.RetentionPolicy;

import java.lang.annotation.Target;

/**

* 在对象的属性上标注此注释,

* 表示纳入kmeans算法,仅支持数值类属性

* @author 阿飞哥

*/

@Retention(RetentionPolicy.RUNTIME)

@Target(ElementType.FIELD)

public @interface KmeanField {

}

import java.lang.annotation.Annotation;

import java.lang.reflect.Field;

import java.lang.reflect.Method;

import java.util.ArrayList;

import java.util.List;

/**

*

* @author 阿飞哥

*

*/

public class Kmeans<T> {

/**

* 所有数据列表

*/

private List<T> players = new ArrayList<T>();

/**

* 数据类别

*/

private Class<T> classT;

/**

* 初始化列表

*/

private List<T> initPlayers;

/**

* 需要纳入kmeans算法的属性名称

*/

private List<String> fieldNames = new ArrayList<String>();

/**

* 分类数

*/

private int k = 1;

public Kmeans() {

}

/**

* 初始化列表

*

* @param list

* @param k

*/

public Kmeans(List<T> list, int k) {

this.players = list;

this.k = k;

T t = list.get(0);

this.classT = (Class<T>) t.getClass();

Field[] fields = this.classT.getDeclaredFields();

System.out.println("fields---------------------------------------------="+fields.length);

for (int i = 0; i < fields.length; i++) {

Annotation kmeansAnnotation = fields[i]

.getAnnotation(KmeanField.class);

if (kmeansAnnotation != null) {

fieldNames.add(fields[i].getName());

System.out.println("fieldNames.add"+ fields[i].getName());

}

}

initPlayers = new ArrayList<T>();

for (int i = 0; i < k; i++) {

initPlayers.add(players.get(i));

}

}

public List<T>[] comput() {

List<T>[] results = new ArrayList[k];

boolean centerchange = true;

while (centerchange) {

centerchange = false;

for (int i = 0; i < k; i++) {

results[i] = new ArrayList<T>();

}

for (int i = 0; i < players.size(); i++) {

T p = players.get(i);

double[] dists = new double[k];

for (int j = 0; j < initPlayers.size(); j++) {

T initP = initPlayers.get(j);

/* 计算距离 */

double dist = distance(initP, p);

// double dist = 1.0;

// double dist = LevenshteinDistance.levenshteinDistance(initP, p);

// System.out.println("dist="+dist);

dists[j] = dist;

}

int dist_index = computOrder(dists);

// System.out.println("dist_index="+dist_index);

results[dist_index].add(p);

}

// System.out.println("results[0].size()="+results[0].size());

for (int i = 0; i < k; i++) { // 在每一个簇中寻找中心点

T player_new = findNewCenter(results[i]);

// System.out.println( "results[i]"+i+"----"+k+"---===="+results[i].size() +"===="+player_new.toString());

T player_old = initPlayers.get(i);

if (!IsPlayerEqual(player_new, player_old)) {

centerchange = true;

initPlayers.set(i, player_new);

}

}

}

// System.out.println( "results+"+results.length);

return results;

}

/**

* 比较是否两个对象是否属性一致

*

* @param p1

* @param p2

* @return

*/

public boolean IsPlayerEqual(T p1, T p2) {

if (p1 == p2) {

return true;

}

if (p1 == null || p2 == null) {

return false;

}

boolean flag = true;

try {

for (int i = 0; i < fieldNames.size(); i++) {

String fieldName=fieldNames.get(i);

String getName = "get"

+ fieldName.substring(0, 1).toUpperCase()

+ fieldName.substring(1);

// System.out.println(fieldNames);

Object value1 = invokeMethod(p1,getName,null);

Object value2 = invokeMethod(p2,getName,null);

if (!value1.equals(value2)) {

flag = false;

break;

}

}

} catch (Exception e) {

e.printStackTrace();

flag = false;

}

return flag;

}

/**

* 得到新聚类中心对象

*

* @param ps

* @return

*/

public T findNewCenter(List<T> ps) {

try {

T t = classT.newInstance();

if (ps == null || ps.size() == 0) {

return t;

}

double[] ds = new double[fieldNames.size()];

for (T vo : ps) {

for (int i = 0; i < fieldNames.size(); i++) {

String fieldName=fieldNames.get(i);

String getName = "get"

+ fieldName.substring(0, 1).toUpperCase()

+ fieldName.substring(1);

Object obj=invokeMethod(vo,getName,null);

Double fv=(obj==null?0:Double.parseDouble(obj+""));

ds[i] += fv;

}

}

// System.out.println("-----------------");

for (int i = 0; i < fieldNames.size(); i++) {

ds[i] = ds[i] / ps.size(); // 平均距离

String fieldName = fieldNames.get(i);

/* 给对象设值 */

String setName = "set"

+ fieldName.substring(0, 1).toUpperCase()

+ fieldName.substring(1);

// invokeMethod(t,setName,new Class[]{double.class},ds[i]);

System.out.println("ds[i] ++="+ds[i]+"----ps.size()"+ps.size());

invokeMethod(t,setName,new Class[]{double.class},ds[i]);

}

return t;

} catch (Exception ex) {

ex.printStackTrace();

}

return null;

}

/**

* 得到最短距离,并返回最短距离索引

*

* @param dists

* @return

*/

public int computOrder(double[] dists) {

double min = 0;

int index = 0;

for (int i = 0; i < dists.length - 1; i++) {

double dist0 = dists[i];

if (i == 0) {

min = dist0;

index = 0;

}

double dist1 = dists[i + 1];

if (min > dist1) {

min = dist1;

index = i + 1;

}

}

return index;

}

/**

* 计算距离(相似性) 采用欧几里得算法

*

* @param p0

* @param p1

* @return

*/

public double distance(T p0, T p1) {

double dis = 0;

try {

for (int i = 0; i < fieldNames.size(); i++) {

String fieldName = fieldNames.get(i);

String getName = "get"

+ fieldName.substring(0, 1).toUpperCase()

+ fieldName.substring(1);

// System.out.println("fieldNames-----="+fieldNames.size());

Double field0Value=Double.parseDouble(invokeMethod(p0,getName,null)+"");

Double field1Value=Double.parseDouble(invokeMethod(p1,getName,null)+"");

// System.out.println("field0Value="+field0Value);

dis += Math.pow(field0Value - field1Value, 2);

}

} catch (Exception ex) {

ex.printStackTrace();

}

return Math.sqrt(dis);

}

/*------公共方法-----*/

public Object invokeMethod(Object owner, String methodName,Class[] argsClass,

Object... args) {

Class ownerClass = owner.getClass();

try {

Method method=ownerClass.getDeclaredMethod(methodName,argsClass);

return method.invoke(owner, args);

} catch (SecurityException e) {

e.printStackTrace();

} catch (NoSuchMethodException e) {

e.printStackTrace();

} catch (Exception ex) {

ex.printStackTrace();

}

return null;

}

}

public class Player {

private int id;

//@KmeanField

private String name;

private int age;

/* 得分 */

@KmeanField

private double goal;

/* 助攻 */

//@KmeanField

private double assists;

/* 篮板 */

//@KmeanField

private double backboard;

/* 抢断 */

//@KmeanField

private double steals;

public int getId() {

return id;

}

public void setId(int id) {

this.id = id;

}

public String getName() {

return name;

}

public void setName(String name) {

this.name = name;

}

public int getAge() {

return age;

}

public void setAge(int age) {

this.age = age;

}

public double getGoal() {

return goal;

}

public void setGoal(double goal) {

this.goal = goal;

}

public double getAssists() {

return assists;

}

public void setAssists(double assists) {

this.assists = assists;

}

public double getBackboard() {

return backboard;

}

public void setBackboard(double backboard) {

this.backboard = backboard;

}

public double getSteals() {

return steals;

}

public void setSteals(double steals) {

this.steals = steals;

}

@Override

public String toString() {

// TODO Auto-generated method stub

return name;

}

}

 

import java.util.ArrayList;

import java.util.List;

import java.util.Random;

public class TestMain {

public static void main(String[] args) {

List<Player> listPlayers=new ArrayList<Player>();

for(int i=0;i<15;i++){

Player p1=new Player();

p1.setName("afei-"+i);

p1.setAssists(i);

p1.setBackboard(i);

//p1.setGoal(new Random(100*i).nextDouble());

p1.setGoal(i*10);

p1.setSteals(i);

//listPlayers.add(p1);

}

Player p1=new Player();

p1.setName("afei1");

p1.setGoal(1);

p1.setAssists(8);

listPlayers.add(p1);

Player p2=new Player();

p2.setName("afei2");

p2.setGoal(2);

listPlayers.add(p2);

Player p3=new Player();

p3.setName("afei3");

p3.setGoal(3);

listPlayers.add(p3);

Player p4=new Player();

p4.setName("afei4");

p4.setGoal(7);

listPlayers.add(p4);

Player p5=new Player();

p5.setName("afei5");

p5.setGoal(8);

listPlayers.add(p5);

Player p6=new Player();

p6.setName("afei6");

p6.setGoal(25);

listPlayers.add(p6);

Player p7=new Player();

p7.setName("afei7");

p7.setGoal(26);

listPlayers.add(p7);

Player p8=new Player();

p8.setName("afei8");

p8.setGoal(27);

listPlayers.add(p8);

Player p9=new Player();

p9.setName("afei9");

p9.setGoal(28);

listPlayers.add(p9);

Kmeans<Player> kmeans = new Kmeans<Player>(listPlayers,2);

List<Player>[] results = kmeans.comput();

for (int i = 0; i < results.length; i++) {

System.out.println("===========类别" + (i + 1) + "================");

List<Player> list = results[i];

for (Player p : list) {

System.out.println(p.getName() + "--->"

+ p.getGoal() + "," + p.getAssists() + ","

+ p.getSteals() + "," + p.getBackboard());

}

}

}

}

源码:https://github.com/chaoren399/dkdemo/tree/master/kmeans/src

以上是 3.聚类–K-means的Java实现 的全部内容, 来源链接: utcz.com/z/391135.html

回到顶部