use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class MekaDataWriter method writeGenericFormat.
@Override
public void writeGenericFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
try {
initGeneric();
bw.write(gson.toJson(instances.toArray(new Instance[0])) + "\n");
bw.close();
bw = null;
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaUtilTest method instanceToArffTest.
@Test
public void instanceToArffTest() throws Exception {
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
i1.addFeature(new Feature("feature4", Values.VALUE_1, FeatureType.NUMERIC));
i1.setOutcomes("1");
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
i2.addFeature(new Feature("feature4", Values.VALUE_2, FeatureType.NUMERIC));
i2.setOutcomes("2");
Instance i3 = new Instance();
i3.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i3.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
i3.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
i3.addFeature(new Feature("feature4", Values.VALUE_3, FeatureType.NUMERIC));
i3.setOutcomes("2");
List<Instance> iList = new ArrayList<>();
iList.add(i1);
iList.add(i2);
iList.add(i3);
File outfile = new File("target/test/out.txt");
outfile.mkdirs();
outfile.createNewFile();
outfile.deleteOnExit();
WekaUtils.instanceListToArffFile(outfile, iList);
System.out.println(FileUtils.readFileToString(outfile, "utf-8"));
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceTest.
@Test
public void tcInstanceToWekaInstanceTest() throws Exception {
List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature5"));
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
attributes.add(new Attribute("feature1"));
attributes.add(new Attribute("outcome", outcomeValues));
Instances trainingData = new Instances("test", attributes, 0);
weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, outcomeValues, false);
assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
assertEquals(5, wekaInstance1.numAttributes());
wekaInstance1.dataset().add(wekaInstance1);
wekaInstance2.dataset().add(wekaInstance2);
System.out.println(wekaInstance1.dataset() + "\n");
System.out.println(wekaInstance2.dataset() + "\n");
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceFailTest.
@Test(expected = IllegalArgumentException.class)
public void tcInstanceToWekaInstanceFailTest() throws Exception {
List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_4", "val_2" })));
attributes.add(new Attribute("outcome", outcomeValues));
Instances trainingData = new Instances("test", attributes, 0);
@SuppressWarnings("unused") weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WekaLoadModelConnector method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Instance instance = null;
try {
InstanceExtractor extractor = new InstanceExtractor(featureMode, featureExtractors, false);
List<Instance> instances = extractor.getInstances(jcas, useSparse);
instance = instances.get(0);
} catch (Exception e1) {
throw new AnalysisEngineProcessException(e1);
}
boolean isMultiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
boolean isRegression = learningMode.equals(Constants.LM_REGRESSION);
if (!isMultiLabel) {
// single-label
weka.core.Instance wekaInstance = null;
try {
wekaInstance = WekaUtils.tcInstanceToWekaInstance(instance, trainingData, classLabels, isRegression);
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
Object val = null;
try {
if (!isRegression) {
val = classLabels.get((int) cls.classifyInstance(wekaInstance));
} else {
val = cls.classifyInstance(wekaInstance);
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
TextClassificationOutcome outcome = getOutcome(jcas);
outcome.setOutcome(val.toString());
} else {
// multi-label
weka.core.Instance mekaInstance = null;
try {
mekaInstance = WekaUtils.tcInstanceToMekaInstance(instance, trainingData, classLabels);
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
double[] vals = null;
try {
vals = cls.distributionForInstance(mekaInstance);
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
List<String> outcomes = new ArrayList<String>();
for (int i = 0; i < vals.length; i++) {
if (vals[i] >= Double.valueOf(bipartitionThreshold)) {
String label = mekaInstance.attribute(i).name().split(WekaDataWriter.CLASS_ATTRIBUTE_PREFIX)[1];
outcomes.add(label);
}
}
// TextClassificationFocus focus = null;
if (FM_DOCUMENT.equals(featureMode) || FM_PAIR.equals(featureMode)) {
Collection<TextClassificationOutcome> oldOutcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
List<Annotation> annotationsList = new ArrayList<Annotation>();
for (TextClassificationOutcome oldOutcome : oldOutcomes) {
annotationsList.add(oldOutcome);
}
for (Annotation annotation : annotationsList) {
annotation.removeFromIndexes();
}
} else {
TextClassificationOutcome annotation = getOutcome(jcas);
annotation.removeFromIndexes();
// focus = JCasUtil.selectSingle(jcas, TextClassificationFocus.class);
}
if (outcomes.size() > 0) {
TextClassificationOutcome newOutcome = new TextClassificationOutcome(jcas);
newOutcome.setOutcome(outcomes.get(0));
newOutcome.addToIndexes();
}
if (outcomes.size() > 1) {
// add more outcome annotations
try {
for (int i = 1; i < outcomes.size(); i++) {
TextClassificationOutcome newOutcome = new TextClassificationOutcome(jcas);
newOutcome.setOutcome(outcomes.get(i));
newOutcome.addToIndexes();
}
} catch (Exception ex) {
String msg = "Error while trying to retrieve TC focus from CAS. Details: " + ex.getMessage();
Logger.getLogger(getClass()).error(msg, ex);
throw new RuntimeException(msg, ex);
}
}
}
}
Aggregations