Search in sources :

Example 6 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class InstanceTest method instanceSetNewFeatureCollectionBySetterTest.

@Test
public void instanceSetNewFeatureCollectionBySetterTest() throws TextClassificationException {
    Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
    Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
    List<Feature> features = new ArrayList<>();
    features.add(f1);
    features.add(f2);
    Instance instance = new Instance(features, "outcome");
    Feature f3 = new Feature("feature3", "value1", FeatureType.STRING);
    Feature f4 = new Feature("feature4", "value1", FeatureType.STRING);
    Set<Feature> newFeatures = new HashSet<>();
    newFeatures.add(f3);
    newFeatures.add(f4);
    instance.setFeatures(newFeatures);
    assertEquals(2, instance.getFeatures().size());
    Iterator<Feature> iterator = instance.getFeatures().iterator();
    Feature next = iterator.next();
    iterator.hasNext();
    Feature next2 = iterator.next();
    if (!next.getName().equals("feature3") && !next2.getName().equals("feature3")) {
        fail("Expected to find a feature named [feature3]");
    }
    if (!next.getName().equals("feature4") && !next2.getName().equals("feature4")) {
        fail("Expected to find a feature named [feature4]");
    }
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class InstanceTest method instanceAddSingleFeatureTest.

@Test
public void instanceAddSingleFeatureTest() throws Exception {
    Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
    Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
    List<Feature> features = new ArrayList<>();
    features.add(f1);
    features.add(f2);
    Instance instance = new Instance(features, "outcome");
    Feature f3 = new Feature("feature3", "value1", FeatureType.STRING);
    instance.addFeature(f3);
    assertEquals(3, instance.getFeatures().size());
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 8 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class InstanceTest method instanceAddFeatureListTest.

@Test
public void instanceAddFeatureListTest() throws Exception {
    Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
    Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
    List<Feature> features = new ArrayList<>();
    features.add(f1);
    features.add(f2);
    Instance instance = new Instance(features, "outcome");
    List<Feature> s = new ArrayList<Feature>();
    Feature f3 = new Feature("feature3", "value3", FeatureType.STRING);
    Feature f4 = new Feature("feature4", "value4", FeatureType.STRING);
    s.add(f3);
    s.add(f4);
    instance.addFeatures(s);
    assertEquals(4, instance.getFeatures().size());
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 9 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class InstanceTest method instanceInitializationWithArrayOfOutcomes.

@Test
public void instanceInitializationWithArrayOfOutcomes() throws Exception {
    Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
    Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
    List<Feature> features = new ArrayList<>();
    features.add(f1);
    features.add(f2);
    Instance instance = new Instance(features, "outcome", "outcome2");
    assertEquals(2, instance.getFeatures().size());
    assertEquals(2, instance.getOutcomes().size());
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 10 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class FilterLuceneCharacterNgramStartingWithLetter method applyFilter.

@Override
public void applyFilter(File inputFeatureFile) throws Exception {
    Gson gson = new Gson();
    // iterating over a stream is for large data more reasonable that
    // bulk-read of all data
    List<String> outputLines = new ArrayList<>();
    List<String> inputLines = FileUtils.readLines(inputFeatureFile, "utf-8");
    for (String l : inputLines) {
        // de-serialize
        Instance[] instances = gson.fromJson(l, Instance[].class);
        List<Instance> filter_out = new ArrayList<>();
        for (Instance inst : instances) {
            // collect features starting with a t-letter
            List<Feature> features = new ArrayList<>(inst.getFeatures());
            List<Feature> deletionTargets = new ArrayList<>();
            for (Feature f : features) {
                if (f.getName().startsWith("charngram")) {
                    deletionTargets.add(f);
                }
            }
            // remove those features
            for (Feature f : deletionTargets) {
                features.remove(f);
            }
            // update instances
            inst.setFeatures(features);
            // re-serialize
            filter_out.add(inst);
        }
        outputLines.add(gson.toJson(filter_out.toArray(new Instance[0]), Instance[].class));
    }
    // Write new file to temporary location
    File tmp = File.createTempFile("tmpFeatureFile", "tmp");
    FileUtils.writeLines(tmp, "utf-8", outputLines);
    // overwrite input file with new file
    FileUtils.copyFile(tmp, inputFeatureFile);
    tmp.delete();
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) Feature(org.dkpro.tc.api.features.Feature) File(java.io.File)

Aggregations

Instance (org.dkpro.tc.api.features.Instance)61 ArrayList (java.util.ArrayList)38 Feature (org.dkpro.tc.api.features.Feature)30 Test (org.junit.Test)27 File (java.io.File)17 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)14 IOException (java.io.IOException)12 Gson (com.google.gson.Gson)8 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)8 Attribute (weka.core.Attribute)8 DenseInstance (weka.core.DenseInstance)8 Instances (weka.core.Instances)8 SparseInstance (weka.core.SparseInstance)8 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)6 BufferedReader (java.io.BufferedReader)5 FileInputStream (java.io.FileInputStream)5 InputStreamReader (java.io.InputStreamReader)5 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)5 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)5 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)5