特征选择微服务主要实现如下特征选择算法:Document Frequency(DF)、Information Gain(IG)、(χ2)Chi-Square Test(CHI)、Mutual Information(MI)、Matrix Projection(MP)。
特征选择类图
特征选择类图如图所示:

部分实现代码
特征选择Action类
package com.robin.feature.action;
import com.robin.feature.corpus.CorpusManager;
import com.robin.feature.AbstractFeature;
import com.robin.feature.FeatureFactory;
import com.robin.feature.FeatureFactory.FeatureMethod;
import com.robin.loader.MircoServiceAction;
import com.robin.log.RobinLogger;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
/**
* <DT><B>描述:</B></DT>
* <DD>特征选择Action类</DD>
*
* 适配Jersey服务器资源调用
*
* @version Version1.0
* @author Robin
* @version <I> Date:2018-04-01</I>
* @author <I> E-mail:xsd-jj@163.com</I>
*/
public class FeatureSelectAction implements MircoServiceAction {
private static final Logger LOGGER = RobinLogger.getLogger();
/**
* Action状态码
*/
public enum StatusCode {
OK,
JSON_ERR,
KIND_ERR,
VERSION_ERR,
TRAIN_SCALE_ERR,
METHOD_ERR,
TEXTS_NULL,
}
/**
* Action状态内部类
*/
private class ActionStatus {
StatusCode statusCode;
String msg;
}
/**
* 获取返回错误状态JSONObject
*
* @param actionStatus
* @return JSONObject
*/
private JSONObject getErrorJson(ActionStatus actionStatus) {
JSONObject errJson = new JSONObject();
try {
errJson.put("status", actionStatus.statusCode.toString());
errJson.put("msg", actionStatus.msg);
} catch (JSONException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage());
}
return errJson;
}
/**
* 检查JSON输入对象具体项
*
* @param jsonObj
* @param key
* @param valueSet
* @param errStatusCode
* @return ActionStatus
*/
private ActionStatus checkJSONObjectTerm(JSONObject jsonObj,
String key,
HashSet<String> valueSet,
StatusCode errStatusCode) {
ActionStatus actionStatus = new ActionStatus();
try {
if (!jsonObj.isNull(key)) {
String value = jsonObj.getString(key);
if (!valueSet.contains(value)) {
actionStatus.msg = "The value [" + value + "] of " + key + " is error.";
actionStatus.statusCode = errStatusCode;
return actionStatus;
}
} else {
actionStatus.msg = "The input parameter is missing " + key + ".";
actionStatus.statusCode = errStatusCode;
return actionStatus;
}
} catch (JSONException ex) {
LOGGER.log(Level.SEVERE, ex.getMessage());
}
actionStatus.statusCode = StatusCode.OK;
return actionStatus;
}
/**
* 检查JSON输入对象
*
* @param jsonObj
* @return ActionStatus
*/
private ActionStatus checkInputJSONObject(JSONObject jsonObj) {
ActionStatus actionStatus = new ActionStatus();
ActionStatus retActionStatus;
HashSet<String> valueSet = new HashSet();
valueSet.add("feature");
retActionStatus = checkJSONObjectTerm(jsonObj, "kind", valueSet, StatusCode.KIND_ERR);
if (!retActionStatus.statusCode.equals(StatusCode.OK)) {
return retActionStatus;
}
valueSet.clear();
valueSet.add("v1");
retActionStatus = checkJSONObjectTerm(jsonObj, "version", valueSet, StatusCode.VERSION_ERR);
if (!retActionStatus.statusCode.equals(StatusCode.OK)) {
return retActionStatus;
}
try {
double trainScale = jsonObj.getJSONObject("metadata").getJSONObject("feature").getDouble("trainScale");
if ((trainScale >= 1.0) || (trainScale <= 0)) {
actionStatus.statusCode = StatusCode.TRAIN_SCALE_ERR;
actionStatus.msg = "The input train_scale [" + trai