Search in sources :

Example 26 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class MekaDataWriter method writeGenericFormat.

@Override
public void writeGenericFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
    try {
        initGeneric();
        bw.write(gson.toJson(instances.toArray(new Instance[0])) + "\n");
        bw.close();
        bw = null;
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 27 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class WekaUtilTest method instanceToArffTest.

@Test
public void instanceToArffTest() throws Exception {
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    i1.addFeature(new Feature("feature4", Values.VALUE_1, FeatureType.NUMERIC));
    i1.setOutcomes("1");
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    i2.addFeature(new Feature("feature4", Values.VALUE_2, FeatureType.NUMERIC));
    i2.setOutcomes("2");
    Instance i3 = new Instance();
    i3.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i3.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
    i3.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    i3.addFeature(new Feature("feature4", Values.VALUE_3, FeatureType.NUMERIC));
    i3.setOutcomes("2");
    List<Instance> iList = new ArrayList<>();
    iList.add(i1);
    iList.add(i2);
    iList.add(i3);
    File outfile = new File("target/test/out.txt");
    outfile.mkdirs();
    outfile.createNewFile();
    outfile.deleteOnExit();
    WekaUtils.instanceListToArffFile(outfile, iList);
    System.out.println(FileUtils.readFileToString(outfile, "utf-8"));
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) File(java.io.File) Test(org.junit.Test)

Example 28 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class WekaUtilTest method tcInstanceToWekaInstanceTest.

@Test
public void tcInstanceToWekaInstanceTest() throws Exception {
    List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("feature5"));
    attributes.add(new Attribute("feature2"));
    attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
    attributes.add(new Attribute("feature1"));
    attributes.add(new Attribute("outcome", outcomeValues));
    Instances trainingData = new Instances("test", attributes, 0);
    weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
    weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, outcomeValues, false);
    assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
    assertEquals(5, wekaInstance1.numAttributes());
    wekaInstance1.dataset().add(wekaInstance1);
    wekaInstance2.dataset().add(wekaInstance2);
    System.out.println(wekaInstance1.dataset() + "\n");
    System.out.println(wekaInstance2.dataset() + "\n");
}
Also used : Instances(weka.core.Instances) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 29 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class WekaUtilTest method tcInstanceToWekaInstanceFailTest.

@Test(expected = IllegalArgumentException.class)
public void tcInstanceToWekaInstanceFailTest() throws Exception {
    List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("feature2"));
    attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_4", "val_2" })));
    attributes.add(new Attribute("outcome", outcomeValues));
    Instances trainingData = new Instances("test", attributes, 0);
    @SuppressWarnings("unused") weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, outcomeValues, false);
}
Also used : Instances(weka.core.Instances) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 30 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class WekaLoadModelConnector method process.

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    Instance instance = null;
    try {
        InstanceExtractor extractor = new InstanceExtractor(featureMode, featureExtractors, false);
        List<Instance> instances = extractor.getInstances(jcas, useSparse);
        instance = instances.get(0);
    } catch (Exception e1) {
        throw new AnalysisEngineProcessException(e1);
    }
    boolean isMultiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
    boolean isRegression = learningMode.equals(Constants.LM_REGRESSION);
    if (!isMultiLabel) {
        // single-label
        weka.core.Instance wekaInstance = null;
        try {
            wekaInstance = WekaUtils.tcInstanceToWekaInstance(instance, trainingData, classLabels, isRegression);
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
        Object val = null;
        try {
            if (!isRegression) {
                val = classLabels.get((int) cls.classifyInstance(wekaInstance));
            } else {
                val = cls.classifyInstance(wekaInstance);
            }
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
        TextClassificationOutcome outcome = getOutcome(jcas);
        outcome.setOutcome(val.toString());
    } else {
        // multi-label
        weka.core.Instance mekaInstance = null;
        try {
            mekaInstance = WekaUtils.tcInstanceToMekaInstance(instance, trainingData, classLabels);
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
        double[] vals = null;
        try {
            vals = cls.distributionForInstance(mekaInstance);
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
        List<String> outcomes = new ArrayList<String>();
        for (int i = 0; i < vals.length; i++) {
            if (vals[i] >= Double.valueOf(bipartitionThreshold)) {
                String label = mekaInstance.attribute(i).name().split(WekaDataWriter.CLASS_ATTRIBUTE_PREFIX)[1];
                outcomes.add(label);
            }
        }
        // TextClassificationFocus focus = null;
        if (FM_DOCUMENT.equals(featureMode) || FM_PAIR.equals(featureMode)) {
            Collection<TextClassificationOutcome> oldOutcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
            List<Annotation> annotationsList = new ArrayList<Annotation>();
            for (TextClassificationOutcome oldOutcome : oldOutcomes) {
                annotationsList.add(oldOutcome);
            }
            for (Annotation annotation : annotationsList) {
                annotation.removeFromIndexes();
            }
        } else {
            TextClassificationOutcome annotation = getOutcome(jcas);
            annotation.removeFromIndexes();
        // focus = JCasUtil.selectSingle(jcas, TextClassificationFocus.class);
        }
        if (outcomes.size() > 0) {
            TextClassificationOutcome newOutcome = new TextClassificationOutcome(jcas);
            newOutcome.setOutcome(outcomes.get(0));
            newOutcome.addToIndexes();
        }
        if (outcomes.size() > 1) {
            // add more outcome annotations
            try {
                for (int i = 1; i < outcomes.size(); i++) {
                    TextClassificationOutcome newOutcome = new TextClassificationOutcome(jcas);
                    newOutcome.setOutcome(outcomes.get(i));
                    newOutcome.addToIndexes();
                }
            } catch (Exception ex) {
                String msg = "Error while trying to retrieve TC focus from CAS. Details: " + ex.getMessage();
                Logger.getLogger(getClass()).error(msg, ex);
                throw new RuntimeException(msg, ex);
            }
        }
    }
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) ResourceInitializationException(org.apache.uima.resource.ResourceInitializationException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) Annotation(org.apache.uima.jcas.tcas.Annotation) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) InstanceExtractor(org.dkpro.tc.core.task.uima.InstanceExtractor)

Aggregations

Instance (org.dkpro.tc.api.features.Instance)61 ArrayList (java.util.ArrayList)38 Feature (org.dkpro.tc.api.features.Feature)30 Test (org.junit.Test)27 File (java.io.File)17 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)14 IOException (java.io.IOException)12 Gson (com.google.gson.Gson)8 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)8 Attribute (weka.core.Attribute)8 DenseInstance (weka.core.DenseInstance)8 Instances (weka.core.Instances)8 SparseInstance (weka.core.SparseInstance)8 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)6 BufferedReader (java.io.BufferedReader)5 FileInputStream (java.io.FileInputStream)5 InputStreamReader (java.io.InputStreamReader)5 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)5 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)5 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)5