Knn的思路为:
1.首先要将需要处理的信息项如一部电影,一篇文章做数值化处理。
以电影为例:一部电影包含的打斗镜头数量,一部电影包含的接吻镜头数量。 通过这两个特征来评价一部电影是动作片还是爱情片。
2.计算已知标签的数据集合的上述这些特征值所代表的每一个数据点与需要评估的数据点之间的距离
3.选择与目标点距离最近的k个点。
4.查看他们的标签都是什么,统计这些标签出现次数最多的标签的1个或者几个,用它做为带评估数据的标签。
所以简单的程序如下:
package org.algorithm.knn;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
public class Knn {
private int[] unknowmovie;
private int k;
// 已知数据集
private List<MoveMeta> movielist;
public static void main(String[] args) {
<span style="white-space:pre"> </span>//此处为测试数据
Knn kn = new Knn(18, 90, 3);
kn.init();
kn.findkpoint();
kn.findtype();
}
public void init() {
// 已知类别数据
movielist = new ArrayList<MoveMeta>();
<span style="white-space:pre"> </span>//数据说明:打斗镜头&接吻镜头数&影片类型
movielist.add(new MoveMeta(3, 104, "爱情"));
movielist.add(new MoveMeta(2, 100, "爱情"));
movielist.add(new MoveMeta(1, 81, "爱情"));
movielist.add(new MoveMeta(101, 10, "动作"));
movielist.add(new MoveMeta(99, 5, "动作"));
movielist.add(new MoveMeta(98, 2, "动作"));
}
public Knn(int a1, int a2, int k) {
this.k = k;
this.unknowmovie = new int[] { a1, a2 };
}
/**
* 目前写死为2维的数据
*
* @param a
* @param b
* @return
*/
public double distance(int[] a, int[] b) {
double tt = Math.sqrt(Math.pow((a[0] - b[0]), 2)
+ Math.pow((a[1] - b[1]), 2));
System.out.println(tt);
return tt;
}
/**
* 找到距离最近的前k个点
*/
public void findkpoint() {
for (MoveMeta mm : movielist) {
mm.setBetweentheunknow(distance(mm.getPoint(), unknowmovie));
}
// 升序
MoveMeta comparator = new MoveMeta();
Collections.sort(movielist, comparator);
}
/**
* 找到类别出现次数最多的
*/
public void findtype() {
Map<String, Integer> typetimes = new HashMap<String, Integer>();
for (int i = 0; i < k; i++) {
String type = movielist.get(i).getMovietype();
if (typetimes.get(type) == null)
typetimes.put(type, 1);
else
typetimes.put(type, typetimes.get(type) + 1);
}
int max = 0;
String maxtype = "";
for (Entry<String, Integer> t : typetimes.entrySet()) {
if (max < t.getValue()) {
max = t.getValue();
maxtype = t.getKey();
} else if (max == t.getValue() && maxtype.indexOf(t.getKey()) < 0) {
maxtype += t.getKey();
}
}
System.out.println("该电影所属分类为: " + maxtype);
}
}
package org.algorithm.knn;
import java.util.Comparator;
public class MoveMeta implements Comparator {
// private String name;
private int[] point;
private String movietype;
private double betweentheunknow;
public MoveMeta(int fighttimes, int kisstimes, String movietype) {
point = new int[] { fighttimes, kisstimes };
this.movietype = movietype;
}
public MoveMeta() {
}
public int[] getPoint() {
return point;
}
public void setPoint(int[] point) {
this.point = point;
}
public String getMovietype() {
return movietype;
}
public void setMovietype(String movietype) {
this.movietype = movietype;
}
public double getBetweentheunknow() {
return betweentheunknow;
}
public void setBetweentheunknow(double betweentheunknow) {
this.betweentheunknow = betweentheunknow;
}
@Override
public int compare(Object arg0, Object arg1) {
MoveMeta user0 = (MoveMeta) arg0;
MoveMeta user1 = (MoveMeta) arg1;
if(user0.getBetweentheunknow()>user1.getBetweentheunknow())
return 1;
else if(user0.getBetweentheunknow()==user1.getBetweentheunknow())
return 0;
else if(user0.getBetweentheunknow()<user1.getBetweentheunknow())
return -1;
else return 0;
}
}
所以结果,该电影所属分类为: 爱情
一下为可供扩展阅读的文章:
http://www.cnblogs.com/zhangchaoyang/articles/2162393.html
http://wenku.baidu.com/link?url=3TajC59tsWvJ5oaO29hTgL3irs4wb2YW7NwpYLNN0xzc4zW0ih8oStZiVG5rpGRXBHbUIlg5Czpq6CmrM4CV42vGZcjiaDm5g4TojLwPNZW
http://blog.youkuaiyun.com/xiaowei_cqu/article/details/23782561