use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class LibsvmDataFormatWriter method writeClassifierFormat.
@Override
public void writeClassifierFormat(Collection<Instance> in) throws AnalysisEngineProcessException {
try {
if (featureNames2id == null) {
createFeatureNameMap();
}
initClassifierFormat();
List<Instance> instances = new ArrayList<>(in);
for (Instance instance : instances) {
Map<Integer, Double> entry = new HashMap<>();
recordInstanceId(instance, maxId++, index2instanceId);
for (Feature f : instance.getFeatures()) {
Integer id = featureNames2id.get(f.getName());
Double val = toValue(f.getValue());
if (Math.abs(val) < 0.00000001) {
// skip zero values
continue;
}
entry.put(id, val);
}
List<Integer> keys = new ArrayList<Integer>(entry.keySet());
Collections.sort(keys);
if (isRegression()) {
bw.append(instance.getOutcome() + "\t");
} else {
bw.append(outcomeMap.get(instance.getOutcome()) + "\t");
}
bw.append(injectSequenceId(instance));
for (int i = 0; i < keys.size(); i++) {
Integer key = keys.get(i);
Double value = entry.get(key);
bw.append("" + key.toString() + ":" + value.toString());
if (i + 1 < keys.size()) {
bw.append("\t");
}
}
bw.append("\n");
}
writeMapping(outputDirectory, INDEX2INSTANCEID, index2instanceId);
writeFeatureName2idMapping(outputDirectory, AdapterFormat.getFeatureNameMappingFilename(), featureNames2id);
writeOutcomeMapping(outputDirectory, AdapterFormat.getOutcomeMappingFilename(), outcomeMap);
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
} finally {
IOUtils.closeQuietly(bw);
// important, we reopen the stream only if the pointer is null!
bw = null;
}
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class LibsvmDataFormatWriter method transformFromGeneric.
@Override
public void transformFromGeneric() throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(outputDirectory, Constants.GENERIC_FEATURE_FILE)), "utf-8"));
String line = null;
while ((line = reader.readLine()) != null) {
Instance[] instance = gson.fromJson(line, Instance[].class);
List<Instance> ins = new ArrayList<>(Arrays.asList(instance));
writeClassifierFormat(ins);
}
reader.close();
FileUtils.deleteQuietly(new File(outputDirectory, Constants.GENERIC_FEATURE_FILE));
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteDataWriter method writeClassifierFormat.
@Override
public void writeClassifierFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
try {
initClassifierFormat();
Iterator<StringBuilder> sequenceIterator = new CrfSuiteFeatureFormatExtractionIterator(new ArrayList<Instance>(instances));
while (sequenceIterator.hasNext()) {
String features = sequenceIterator.next().toString();
bw.write(features);
bw.write("\n");
}
bw.close();
bw = null;
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteDataWriter method transformFromGeneric.
@Override
public void transformFromGeneric() throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(outputDirectory, Constants.GENERIC_FEATURE_FILE)), "utf-8"));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(classifierFormatOutputFile), "utf-8"));
String line = null;
while ((line = reader.readLine()) != null) {
Instance[] instance = gson.fromJson(line, Instance[].class);
List<Instance> ins = new ArrayList<>(Arrays.asList(instance));
Iterator<StringBuilder> sequenceIterator = new CrfSuiteFeatureFormatExtractionIterator(ins);
while (sequenceIterator.hasNext()) {
String features = sequenceIterator.next().toString();
writer.write(features);
writer.write("\n");
}
}
reader.close();
writer.close();
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class ExtractFeaturesConnectorTest method extractFeaturesConnectorSingleLabelTest.
@Test
public void extractFeaturesConnectorSingleLabelTest() throws Exception {
File outputPath = folder.newFolder();
// we do not need parameters here, but in case we do :)
Object[] parameters = new Object[] { NoopFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123" };
ExternalResourceDescription featureExtractor = ExternalResourceFactory.createExternalResourceDescription(NoopFeatureExtractor.class, parameters);
List<ExternalResourceDescription> fes = new ArrayList<>();
fes.add(featureExtractor);
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TestReaderSingleLabel.class, TestReaderSingleLabel.PARAM_SOURCE_LOCATION, "src/test/resources/data/*.txt");
AnalysisEngineDescription segmenter = AnalysisEngineFactory.createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngineDescription doc = AnalysisEngineFactory.createEngineDescription(DocumentModeAnnotator.class, DocumentModeAnnotator.PARAM_FEATURE_MODE, Constants.FM_DOCUMENT);
AnalysisEngineDescription featExtractorConnector = TaskUtils.getFeatureExtractorConnector(outputPath.getAbsolutePath(), JsonDataWriter.class.getName(), Constants.LM_REGRESSION, Constants.FM_DOCUMENT, false, false, false, false, Collections.emptyList(), fes, new String[] {});
SimplePipeline.runPipeline(reader, segmenter, doc, featExtractorConnector);
Gson gson = new Gson();
System.out.println(FileUtils.readFileToString(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8"));
List<String> lines = FileUtils.readLines(new File(outputPath, JsonDataWriter.JSON_FILE_NAME), "utf-8");
List<Instance> instances = new ArrayList<>();
for (String l : lines) {
instances.add(gson.fromJson(l, Instance.class));
}
assertEquals(2, instances.size());
assertEquals(1, getUniqueOutcomes(instances));
}
Aggregations