Search in sources :

Example 41 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class InstanceTest method instanceInitializationWithListOfOutcomes.

@Test
public void instanceInitializationWithListOfOutcomes() throws Exception {
    Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
    Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
    List<Feature> features = new ArrayList<>();
    features.add(f1);
    features.add(f2);
    List<String> outcomes = new ArrayList<>();
    outcomes.add("outcome1");
    outcomes.add("outcome2");
    Instance instance = new Instance(features, outcomes);
    assertEquals(2, instance.getFeatures().size());
    assertEquals(2, instance.getOutcomes().size());
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 42 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class ExtractFeaturesConnectorTest method extractFeaturesConnectorRegressionTest.

@Test
public void extractFeaturesConnectorRegressionTest() throws Exception {
    File outputPath = folder.newFolder();
    // we do not need parameters here, but in case we do :)
    Object[] parameters = new Object[] { NoopFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123", UnitContextMetaCollector.PARAM_CONTEXT_FOLDER, Constants.ID_CONTEXT_KEY };
    ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(NoopFeatureExtractor.class, parameters);
    List<ExternalResourceDescription> fes = new ArrayList<>();
    fes.add(featureExtractor);
    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderRegression.class, TestReaderRegression.PARAM_SOURCE_LOCATION, "src/test/resources/data/*.txt");
    AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngineDescription doc = AnalysisEngineFactory.createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, Constants.FM_DOCUMENT);
    AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_REGRESSION, Constants.FM_DOCUMENT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
    SimplePipeline.runPipeline(reader, segmenter, doc, featExtractorConnector);
    Gson gson = new Gson();
    List<String> lines = FileUtils.readLines(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8");
    List<Instance> instances = new ArrayList<>();
    for (String l : lines) {
        instances.add(gson.fromJson(l, Instance.class));
    }
    assertEquals(2, instances.size());
    assertEquals(1, getUniqueOutcomes(instances));
    assertEquals("0.45", instances.get(0).getOutcome());
    System.out.println(FileUtils.readFileToString(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8"));
}
Also used : JsonDataWriter(org.dkpro.tc.core.io.JsonDataWriter) Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) CollectionReaderDescription(org.apache.uima.collection.CollectionReaderDescription) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) File(java.io.File) ExternalResourceDescription(org.apache.uima.resource.ExternalResourceDescription) Test(org.junit.Test)

Example 43 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class WordNGramTest method evaluateExtractedFeatures.

@Override
protected void evaluateExtractedFeatures(File output) throws Exception {
    List<Instance> instances = readInstances(output);
    assertEquals(4, instances.size());
    assertEquals(1, getUniqueOutcomes(instances));
    Set<String> featureNames = new HashSet<String>();
    for (Instance i : instances) {
        for (Feature f : i.getFeatures()) {
            featureNames.add(f.getName());
        }
    }
    assertEquals(3, featureNames.size());
    assertTrue(featureNames.contains("ngram_4"));
    assertTrue(featureNames.contains("ngram_5"));
    assertTrue(featureNames.contains("ngram_5_5"));
}
Also used : Instance(org.dkpro.tc.api.features.Instance) Feature(org.dkpro.tc.api.features.Feature) HashSet(java.util.HashSet)

Example 44 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class LibsvmDataFormatLoadModelConnector method createInputFile.

private File createInputFile(JCas jcas) throws Exception {
    File tempFile = FileUtil.createTempFile("libsvm", ".txt");
    tempFile.deleteOnExit();
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tempFile), "utf-8"));
    InstanceExtractor extractor = new InstanceExtractor(featureMode, featureExtractors, true);
    List<Instance> instances = extractor.getInstances(jcas, true);
    for (Instance instance : instances) {
        bw.write(OUTCOME_PLACEHOLDER);
        bw.write(injectSequenceId(instance));
        for (Feature f : instance.getFeatures()) {
            if (!sanityCheckValue(f)) {
                continue;
            }
            bw.write("\t");
            bw.write(featureMapping.get(f.getName()) + ":" + f.getValue());
        }
        bw.write("\n");
    }
    bw.close();
    return tempFile;
}
Also used : Instance(org.dkpro.tc.api.features.Instance) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) InstanceExtractor(org.dkpro.tc.core.task.uima.InstanceExtractor) Feature(org.dkpro.tc.api.features.Feature) BufferedWriter(java.io.BufferedWriter)

Example 45 with Instance

use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.

the class CrfSuiteDataWriter method writeGenericFormat.

@Override
public void writeGenericFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
    try {
        initGeneric();
        // bulk-write - in sequence mode this keeps the instances together
        // that
        // belong to the same sequence!
        Instance[] array = instances.toArray(new Instance[0]);
        bw.write(gson.toJson(array) + "\n");
        bw.close();
        bw = null;
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : Instance(org.dkpro.tc.api.features.Instance) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Aggregations

Instance (org.dkpro.tc.api.features.Instance)61 ArrayList (java.util.ArrayList)38 Feature (org.dkpro.tc.api.features.Feature)30 Test (org.junit.Test)27 File (java.io.File)17 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)14 IOException (java.io.IOException)12 Gson (com.google.gson.Gson)8 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)8 Attribute (weka.core.Attribute)8 DenseInstance (weka.core.DenseInstance)8 Instances (weka.core.Instances)8 SparseInstance (weka.core.SparseInstance)8 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)6 BufferedReader (java.io.BufferedReader)5 FileInputStream (java.io.FileInputStream)5 InputStreamReader (java.io.InputStreamReader)5 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)5 CollectionReaderDescription (org.apache.uima.collection.CollectionReaderDescription)5 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)5