下面的代码产生一个实例对象,作为ARFF文件输出到标准流。
产生如下类型的属性:
- numeric
- nominal
- string
- date
- relational
AttTest.java
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
/**
* Generates a little ARFF file with different attribute types.
*
* @author FracPete
*/
public class AttTest {
public static void main(String[] args) throws Exception {
FastVector atts;
FastVector attsRel;
FastVector attVals;
FastVector attValsRel;
Instances data;
Instances dataRel;
double[] vals;
double[] valsRel;
int i;
// 1. set up attributes
atts = new FastVector();
// - numeric
atts.addElement(new Attribute("att1"));
// - nominal
attVals = new FastVector();
for (i = 0; i < 5; i++)
attVals.addElement("val" + (i+1));
atts.addElement(new Attribute("att2", attVals));
// - string
atts.addElement(new Attribute("att3", (FastVector) null));
// - date
atts.addElement(new Attribute("att4", "yyyy-MM-dd"));
// - relational
attsRel = new FastVector();
// -- numeric
attsRel.addElement(new Attribute("att5.1"));
// -- nominal
attValsRel = new FastVector();
for (i = 0; i < 5; i++)
attValsRel.addElement("val5." + (i+1));
attsRel.addElement(new Attribute("att5.2", attValsRel));
dataRel = new Instances("att5", attsRel, 0);
atts.addElement(new Attribute("att5", dataRel, 0));
// 2. create Instances object
data = new Instances("MyRelation", atts, 0);
// 3. fill with data
// first instance
vals = new double[data.numAttributes()];
// - numeric
vals[0] = Math.PI;
// - nominal
vals[1] = attVals.indexOf("val3");
// - string
vals[2] = data.attribute(2).addStringValue("This is a string!");
// - date
vals[3] = data.attribute(3).parseDate("2001-11-09");
// - relational
dataRel = new Instances(data.attribute(4).relation(), 0);
// -- first instance
valsRel = new double[2];
valsRel[0] = Math.PI + 1;
valsRel[1] = attValsRel.indexOf("val5.3");
dataRel.add(new Instance(1.0, valsRel));
// -- second instance
valsRel = new double[2];
valsRel[0] = Math.PI + 2;
valsRel[1] = attValsRel.indexOf("val5.2");
dataRel.add(new Instance(1.0, valsRel));
vals[4] = data.attribute(4).addRelation(dataRel);
// add
data.add(new Instance(1.0, vals));
// second instance
vals = new double[data.numAttributes()]; // important: needs NEW array!
// - numeric
vals[0] = Math.E;
// - nominal
vals[1] = attVals.indexOf("val1");
// - string
vals[2] = data.attribute(2).addStringValue("And another one!");
// - date
vals[3] = data.attribute(3).parseDate("2000-12-01");
// - relational
dataRel = new Instances(data.attribute(4).relation(), 0);
// -- first instance
valsRel = new double[2];
valsRel[0] = Math.E + 1;
valsRel[1] = attValsRel.indexOf("val5.4");
dataRel.add(new Instance(1.0, valsRel));
// -- second instance
valsRel = new double[2];
valsRel[0] = Math.E + 2;
valsRel[1] = attValsRel.indexOf("val5.1");
dataRel.add(new Instance(1.0, valsRel));
vals[4] = data.attribute(4).addRelation(dataRel);
// add
data.add(new Instance(1.0, vals));
// 4. output data
System.out.println(data);
}
}对于丢失的数据
默认新的数组被初始化为0。如果你想在某个位置设置缺失值,通过weka.core.Instance类中的missingValue()方法来明确的设置。在3.7.1版本后的weka中的Instance 是一个接口,所以missingValue()被移到了weka.core.Utils中。如果你已经有一个存在的weka.core.Instance对象,那么你可以使用setMissing(int)方法在给定位置设置缺失值。下面给出在第三个属性设置缺失值的例子。
- double array:
double[] vals = ... // from somewhere, e.g., from AttTest.java example
vals[2] = Instance.missingValue(); // or ... = Utils.missingValue() for Weka > 3.7.1- weka.core.Instance object:
double[] vals = ... // from somewhere, e.g., from AttTest.java example
Instance inst = new Instance(1.0, vals);
inst.setMissing(2);
本文介绍了一个使用Weka库创建包含多种属性类型(数值、名义、字符串、日期和关系型)的ARFF文件的Java示例。示例代码演示了如何设置不同类型的属性,并填充数据。
1377

被折叠的 条评论
为什么被折叠?



