use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteFeatureFormatExtractionIteratorTest method buildFeatures.
private void buildFeatures() throws Exception {
fs = new ArrayList<>();
List<Feature> features1 = new ArrayList<Feature>();
features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
features1.add(new Feature("feature3", "Water", FeatureType.STRING));
List<Feature> features2 = new ArrayList<Feature>();
features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature3", "Fanta", FeatureType.STRING));
Instance instance1 = new Instance(features1, "1");
instance1.setJcasId(0);
instance1.setSequenceId(0);
instance1.setSequencePosition(0);
Instance instance2 = new Instance(features2, "2");
instance1.setJcasId(0);
instance2.setSequenceId(0);
instance2.setSequencePosition(1);
Instance instance3 = new Instance(features1, "3");
instance1.setJcasId(0);
instance3.setSequenceId(0);
instance3.setSequencePosition(2);
Instance instance4 = new Instance(features1, "4");
instance1.setJcasId(0);
instance4.setSequenceId(1);
instance4.setSequencePosition(0);
Instance instance5 = new Instance(features2, "4");
instance1.setJcasId(0);
instance5.setSequenceId(1);
instance5.setSequencePosition(1);
fs.add(instance1);
fs.add(instance2);
fs.add(instance3);
fs.add(instance4);
fs.add(instance5);
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteLoadModelConnector method getInstancesInSequence.
private List<Instance> getInstancesInSequence(FeatureExtractorResource_ImplBase[] featureExtractors, JCas jcas, TextClassificationSequence sequence, boolean addInstanceId, int sequenceId) throws Exception {
List<Instance> instances = new ArrayList<Instance>();
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
List<TextClassificationTarget> seqTargets = JCasUtil.selectCovered(jcas, TextClassificationTarget.class, sequence);
for (TextClassificationTarget aTarget : seqTargets) {
Instance instance = new Instance();
if (addInstanceId) {
instance.addFeature(InstanceIdFeature.retrieve(jcas, aTarget, sequenceId));
}
// execute feature extractors and add features to instance
try {
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
instance.addFeatures(((FeatureExtractor) featExt).extract(jcas, aTarget));
}
} catch (TextClassificationException e) {
throw new AnalysisEngineProcessException(e);
}
// set and write outcome label(s)
instance.setOutcomes(getOutcomes(jcas, aTarget));
instance.setJcasId(jcasId);
instance.setSequenceId(sequenceId);
instance.setSequencePosition(aTarget.getId());
instances.add(instance);
}
return instances;
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteLoadModelConnector method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
try {
int sequenceId = 0;
List<Instance> instance = new ArrayList<>();
for (TextClassificationSequence seq : JCasUtil.select(jcas, TextClassificationSequence.class)) {
instance.addAll(getInstancesInSequence(featureExtractors, jcas, seq, true, sequenceId++));
}
CrfSuiteFeatureFormatExtractionIterator iterator = new CrfSuiteFeatureFormatExtractionIterator(instance);
// takes N sequences and classifies them - all results are hold in
// memory
StringBuilder output = new StringBuilder();
while (iterator.hasNext()) {
StringBuilder buffer = new StringBuilder();
int limit = 5000;
int idx = 0;
while (iterator.hasNext()) {
StringBuilder seqInfo = iterator.next();
buffer.append(seqInfo);
idx++;
if (idx == limit) {
break;
}
}
List<String> command = buildCommand();
StringBuilder out = runCommand(command, buffer.toString());
output.append(out);
}
setPredictedOutcome(jcas, output.toString());
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteFeatureFormatExtractionIterator method next.
@Override
public StringBuilder next() {
StringBuilder sb = new StringBuilder();
try {
String lastSeenSeqId = idInitVal;
boolean seqIdChanged = false;
for (; insIdx < instances.size(); insIdx++) {
Instance i = instances.get(insIdx);
String id = getId(i);
if (!lastSeenSeqId.equals(id)) {
seqIdChanged = true;
lastSeenSeqId = getId(i);
}
sb.append(LabelSubstitutor.labelReplacement(i.getOutcome()));
sb.append("\t");
int idx = 0;
for (Feature f : i.getFeatures()) {
sb.append(f.getName() + "=" + f.getValue());
if (idx + 1 < i.getFeatures().size()) {
sb.append("\t");
}
idx++;
}
// Mark first line of new sequence with an additional __BOS__
if (seqIdChanged) {
sb.append("\t");
sb.append("__BOS__");
seqIdChanged = false;
}
// Peak ahead - seqEnd reached?
if (insIdx + 1 < instances.size()) {
Instance next = instances.get(insIdx + 1);
String nextId = getId(next);
if (!lastSeenSeqId.equals(nextId)) {
appendEOS(sb);
insIdx++;
break;
}
} else if (insIdx + 1 == instances.size()) {
appendEOS(sb);
insIdx++;
// We're done
break;
}
sb.append("\n");
}
} catch (Exception e) {
throw new UnsupportedOperationException(e);
}
return sb;
}
use of org.dkpro.tc.api.features.Instance in project dkpro-tc by dkpro.
the class CrfSuiteDataWriterTest method prepareFeatures.
private void prepareFeatures() throws Exception {
List<Feature> features1 = new ArrayList<Feature>();
features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
features1.add(new Feature("feature3", "Water", FeatureType.STRING));
List<Feature> features2 = new ArrayList<Feature>();
features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature3", "Fanta", FeatureType.STRING));
Instance instance1 = new Instance(features1, "1");
instance1.setSequenceId(0);
instance1.setSequencePosition(0);
Instance instance2 = new Instance(features2, "2");
instance2.setSequenceId(0);
instance2.setSequencePosition(1);
Instance instance3 = new Instance(features1, "3");
instance3.setSequenceId(0);
instance3.setSequencePosition(2);
Instance instance4 = new Instance(features1, "4");
instance4.setSequenceId(1);
instance4.setSequencePosition(0);
Instance instance5 = new Instance(features2, "4");
instance5.setSequenceId(1);
instance5.setSequencePosition(1);
instances.add(instance1);
instances.add(instance2);
instances.add(instance3);
instances.add(instance4);
instances.add(instance5);
}
Aggregations