use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class InstanceTest method instanceInitializationWithListOfOutcomes.
@Test
public void instanceInitializationWithListOfOutcomes() throws Exception {
Feature f1 = new Feature("feature1", "value1", FeatureType.STRING);
Feature f2 = new Feature("feature2", "value1", FeatureType.STRING);
List<Feature> features = new ArrayList<>();
features.add(f1);
features.add(f2);
List<String> outcomes = new ArrayList<>();
outcomes.add("outcome1");
outcomes.add("outcome2");
Instance instance = new Instance(features, outcomes);
assertEquals(2, instance.getFeatures().size());
assertEquals(2, instance.getOutcomes().size());
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class ExtractFeaturesConnectorTest method extractFeaturesConnectorRegressionTest.
@Test
public void extractFeaturesConnectorRegressionTest() throws Exception {
File outputPath = folder.newFolder();
// we do not need parameters here, but in case we do :)
Object[] parameters = new Object[] { NoopFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123", UnitContextMetaCollector.PARAM_CONTEXT_FOLDER, Constants.ID_CONTEXT_KEY };
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(NoopFeatureExtractor.class, parameters);
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderRegression.class, TestReaderRegression.PARAM_SOURCE_LOCATION, "src/test/resources/data/*.txt");
AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription doc = AnalysisEngineFactory.createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, Constants.FM_DOCUMENT);
AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_REGRESSION, Constants.FM_DOCUMENT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
SimplePipeline.runPipeline(reader, segmenter, doc, featExtractorConnector);
Gson gson = new Gson();
List<String> lines = FileUtils.readLines(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8");
List<Instance> instances = new ArrayList<>();
for (String l : lines) {
instances.add(gson.fromJson(l, Instance.class));
}
assertEquals(2, instances.size());
assertEquals(1, getUniqueOutcomes(instances));
assertEquals("0.45", instances.get(0).getOutcome());
System.out.println(FileUtils.readFileToString(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8"));
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class WordNGramTest method evaluateExtractedFeatures.
@Override
protected void evaluateExtractedFeatures(File output) throws Exception {
List<Instance> instances = readInstances(output);
assertEquals(4, instances.size());
assertEquals(1, getUniqueOutcomes(instances));
Set<String> featureNames = new HashSet<String>();
for (Instance i : instances) {
for (Feature f : i.getFeatures()) {
featureNames.add(f.getName());
}
}
assertEquals(3, featureNames.size());
assertTrue(featureNames.contains("ngram_4"));
assertTrue(featureNames.contains("ngram_5"));
assertTrue(featureNames.contains("ngram_5_5"));
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class LibsvmDataFormatLoadModelConnector method createInputFile.
private File createInputFile(JCas jcas) throws Exception {
File tempFile = FileUtil.createTempFile("libsvm", ".txt");
tempFile.deleteOnExit();
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tempFile), "utf-8"));
InstanceExtractor extractor = new InstanceExtractor(featureMode, featureExtractors, true);
List<Instance> instances = extractor.getInstances(jcas, true);
for (Instance instance : instances) {
bw.write(OUTCOME_PLACEHOLDER);
bw.write(injectSequenceId(instance));
for (Feature f : instance.getFeatures()) {
if (!sanityCheckValue(f)) {
continue;
}
bw.write("\t");
bw.write(featureMapping.get(f.getName()) + ":" + f.getValue());
}
bw.write("\n");
}
bw.close();
return tempFile;
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteDataWriter method writeGenericFormat.
@Override
public void writeGenericFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
try {
initGeneric();
// bulk-write - in sequence mode this keeps the instances together
// that
// belong to the same sequence!
Instance[] array = instances.toArray(new Instance[0]);
bw.write(gson.toJson(array) + "\n");
bw.close();
bw = null;
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
Aggregations