use of org.dkpro.tc.api.features.FeatureExtractor in project dkpro-tc by dkpro.
the class InstanceExtractor method getUnitInstances.
public List<Instance> getUnitInstances(JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
List<Instance> instances = new ArrayList<Instance>();
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
Collection<TextClassificationTarget> targets = JCasUtil.select(jcas, TextClassificationTarget.class);
for (TextClassificationTarget aTarget : targets) {
Instance instance = new Instance();
if (addInstanceId) {
Feature feat = InstanceIdFeature.retrieve(jcas, aTarget);
instance.addFeature(feat);
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (!(featExt instanceof FeatureExtractor)) {
throw new TextClassificationException("Feature extractor does not implement interface [" + FeatureExtractor.class.getName() + "]: " + featExt.getResourceName());
}
if (supportSparseFeatures) {
instance.addFeatures(getSparse(jcas, aTarget, featExt));
} else {
instance.addFeatures(getDense(jcas, aTarget, featExt));
}
}
// set and write outcome label(s)
instance.setOutcomes(getOutcomes(jcas, aTarget));
instance.setWeight(getWeight(jcas, aTarget));
instance.setJcasId(jcasId);
// instance.setSequenceId(sequenceId);
instance.setSequencePosition(aTarget.getId());
instances.add(instance);
}
return instances;
}
use of org.dkpro.tc.api.features.FeatureExtractor in project dkpro-tc by dkpro.
the class InstanceExtractor method getSparse.
private Set<Feature> getSparse(JCas jcas, TextClassificationTarget unit, FeatureExtractorResource_ImplBase featExt) throws TextClassificationException {
Set<Feature> features = ((FeatureExtractor) featExt).extract(jcas, unit);
Set<Feature> filtered = new HashSet<>();
for (Feature f : features) {
if (!f.isDefaultValue()) {
filtered.add(f);
}
}
return filtered;
}
use of org.dkpro.tc.api.features.FeatureExtractor in project dkpro-tc by dkpro.
the class InstanceExtractor method getSingleInstanceDocument.
private Instance getSingleInstanceDocument(Instance instance, JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
TextClassificationTarget documentTcu = JCasUtil.selectSingle(jcas, TextClassificationTarget.class);
if (addInstanceId) {
instance.addFeature(InstanceIdFeature.retrieve(jcas));
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (!(featExt instanceof FeatureExtractor)) {
throw new TextClassificationException("Using incompatible feature in document mode: " + featExt.getResourceName());
}
if (supportSparseFeatures) {
instance.addFeatures(getSparse(jcas, documentTcu, featExt));
} else {
instance.addFeatures(getDense(jcas, documentTcu, featExt));
}
instance.setOutcomes(getOutcomes(jcas, null));
instance.setWeight(getWeight(jcas, null));
instance.setJcasId(jcasId);
}
return instance;
}
Aggregations