import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.List;
import com.doc88.bean.PaperHandled;
import com.doc88.dao.DbDao;
import com.doc88.dao.Pcode_Handled;
import com.doc88.weight.Weight;
/**
* 使用贝叶斯分类算法,分别采用伯努利模型和多项式模型进行计算,比较准确率 使用类LDA分类算法进行分类
*
* @author Ada
*
*/
// 贝叶斯分类方法
public class Classify {
// 首先实现贝叶斯伯努利模型分类
public void computeCateProb() {
String sel = "SELECT keyword FROM feature_p WHERE pid=?";
String sl = "SELECT doc_exist FROM c_kwinfo WHERE keyword=? and cat_id=?";
String ins = "insert into result_classification(pid,cat-id,cat_id,right) values (?,?,?,?)";
Connection con = new DbDao().getConnection();
PreparedStatement pd = null;
PreparedStatement pdt = null;
PreparedStatement pt = null;
ResultSet rs = null;
ResultSet res = null;
int N = 9975;// 训练集中总的文档数
int index = 0;
double[] P = new double[3];
double Pc[] = new double[3];
int[] category = { 275, 5959, 1679 };
try {
if (null != con) {
List<PaperHandled> pidList = new Pcode_Handled().getPid();
for (int i = 0; i < pidList.size(); i++) {
String pid = pidList.get(i).getPid();
pd = con.prepareStatement(sel);
pd.setString(1, pid);
rs = pd.executeQuery();
pd.close();
while (rs.next()) {
String keyword = rs.getString("keyword");
for (int j = 0; j < 3; j++) {
double P_cond = 1;
int cat_id = category[j];
System.out.println(cat_id);
int Nci = new Weight()
.getCountByCat_id(cat_id, con);// 训练集中属于类别ci的文档数
P[j] = Nci / N;// 伯努利模型先验概率
System.out.println(P[j]);
pdt = con.prepareStatement(sl);
pdt.setString(1, keyword);
pdt.setInt(2, cat_id);
res = pdt.executeQuery();
while (res.next()) {
int doc_exist = res.getInt("doc_exist");
int m = getKwCountByCat_id(cat_id,con);
double P_conditional = (doc_exist + 1)
/ (Nci + m + 3);
P_cond = P_cond * P_conditional;
}
Pc[j] = P[j] * P_cond;
}
index = maxElement(Pc);
System.out.println("文章" + pid + "所属的类别是"
+ category[index]);// 此处还需要完善,将分类结果写到数据库中
pt = con.prepareStatement(ins);
pt.setString(1, pid);
pt.setInt(2, category[index]);
int cat_id = getCat_idByPid(pid, con);
pt.setInt(3, cat_id);
if (category[index] == cat_id) {
pt.setInt(4, 1);
} else {
pt.setInt(4, 0);
}
pt.execute();
}
}
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
try {
if (null != pd) {
pd.close();
}
if (null != pdt) {
pdt.close();
}
if (null != rs) {
rs.close();
}
if (null != con) {
con.close();
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
朴素贝叶斯实现文本分类部分代码(2)
最新推荐文章于 2023-07-07 18:55:40 发布