use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaDataWriter method writeGenericFormat.
@Override
public void writeGenericFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
try {
initGeneric();
bw.write(gson.toJson(instances.toArray(new Instance[0])) + "\n");
bw.close();
bw = null;
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaFeatureEncoder method getAttributeStore.
public static AttributeStore getAttributeStore(Collection<Instance> instances) throws TextClassificationException {
AttributeStore attributeStore = new AttributeStore();
for (Instance instance : instances) {
for (Feature feature : instance.getFeatures()) {
if (!attributeStore.containsAttributeName(feature.getName())) {
Attribute attribute = featureToAttribute(feature);
attributeStore.addAttribute(feature.getName(), attribute);
}
}
}
return attributeStore;
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToMekaInstanceTest.
@Test
public void tcInstanceToMekaInstanceTest() throws Exception {
List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("outc_1", Arrays.asList(new String[] { "0", "1" })));
attributes.add(new Attribute("outc_2", Arrays.asList(new String[] { "0", "1" })));
attributes.add(new Attribute("outc_3", Arrays.asList(new String[] { "0", "1" })));
attributes.add(new Attribute("feature5"));
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
attributes.add(new Attribute("feature1"));
Instances trainingData = new Instances("test", attributes, 0);
weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToMekaInstance(i1, trainingData, outcomeValues);
weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToMekaInstance(i2, trainingData, outcomeValues);
assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
assertEquals(7, wekaInstance1.numAttributes());
wekaInstance1.dataset().add(wekaInstance1);
wekaInstance2.dataset().add(wekaInstance2);
System.out.println(wekaInstance1.dataset() + "\n");
System.out.println(wekaInstance2.dataset() + "\n");
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceRegressionTest.
@Test
public void tcInstanceToWekaInstanceRegressionTest() throws Exception {
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature5"));
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
attributes.add(new Attribute("feature1"));
attributes.add(new Attribute("outcome"));
Instances trainingData = new Instances("test", attributes, 0);
weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, null, true);
weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, null, true);
assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
assertEquals(5, wekaInstance1.numAttributes());
wekaInstance1.dataset().add(wekaInstance1);
wekaInstance2.dataset().add(wekaInstance2);
System.out.println(wekaInstance1.dataset() + "\n");
System.out.println(wekaInstance2.dataset() + "\n");
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaUtilTest method instanceToArffTest_multiLabel.
@Test
public void instanceToArffTest_multiLabel() throws Exception {
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
i1.addFeature(new Feature("feature4", Values.VALUE_1, FeatureType.NUMERIC));
i1.setOutcomes("1", "2");
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
i2.addFeature(new Feature("feature4", Values.VALUE_2, FeatureType.NUMERIC));
i2.setOutcomes("2", "3");
Instance i3 = new Instance();
i3.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i3.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
i3.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
i3.addFeature(new Feature("feature4", Values.VALUE_3, FeatureType.NUMERIC));
i3.setOutcomes("2");
List<Instance> instances = new ArrayList<>();
instances.add(i1);
instances.add(i2);
instances.add(i3);
File outfile = new File("target/test/out.txt");
outfile.mkdirs();
outfile.createNewFile();
outfile.deleteOnExit();
WekaUtils.instanceListToArffFileMultiLabel(outfile, instances, false);
System.out.println(FileUtils.readFileToString(outfile, "utf-8"));
}
Aggregations