use of org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase in project dkpro-tc by dkpro.
the class InstanceExtractor method getSingleInstanceDocument.
private Instance getSingleInstanceDocument(Instance instance, JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
TextClassificationTarget documentTcu = JCasUtil.selectSingle(jcas, TextClassificationTarget.class);
if (addInstanceId) {
instance.addFeature(InstanceIdFeature.retrieve(jcas));
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (!(featExt instanceof FeatureExtractor)) {
throw new TextClassificationException("Using incompatible feature in document mode: " + featExt.getResourceName());
}
if (supportSparseFeatures) {
instance.addFeatures(getSparse(jcas, documentTcu, featExt));
} else {
instance.addFeatures(getDense(jcas, documentTcu, featExt));
}
instance.setOutcomes(getOutcomes(jcas, null));
instance.setWeight(getWeight(jcas, null));
instance.setJcasId(jcasId);
}
return instance;
}
use of org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase in project dkpro-tc by dkpro.
the class InstanceExtractor method getSequenceInstances.
public List<Instance> getSequenceInstances(JCas jcas, boolean useSparse) throws TextClassificationException {
List<Instance> instances = new ArrayList<Instance>();
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
int sequenceId = 0;
int unitId = 0;
Collection<TextClassificationSequence> sequences = JCasUtil.select(jcas, TextClassificationSequence.class);
for (TextClassificationSequence seq : sequences) {
unitId = 0;
List<TextClassificationTarget> seqTargets = JCasUtil.selectCovered(jcas, TextClassificationTarget.class, seq);
for (TextClassificationTarget aTarget : seqTargets) {
aTarget.setId(unitId++);
Instance instance = new Instance();
if (addInstanceId) {
instance.addFeature(InstanceIdFeature.retrieve(jcas, aTarget, sequenceId));
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (useSparse) {
instance.addFeatures(getSparse(jcas, aTarget, featExt));
} else {
instance.addFeatures(getDense(jcas, aTarget, featExt));
}
}
// set and write outcome label(s)
instance.setOutcomes(getOutcomes(jcas, aTarget));
instance.setWeight(getWeight(jcas, aTarget));
instance.setJcasId(jcasId);
instance.setSequenceId(sequenceId);
instance.setSequencePosition(aTarget.getId());
instances.add(instance);
}
sequenceId++;
}
return instances;
}
use of org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase in project dkpro-tc by dkpro.
the class ValidityCheckConnector method verifyPairMode.
private void verifyPairMode(String[] featureExtractors) throws Exception {
for (String featExt : featureExtractors) {
FeatureExtractorResource_ImplBase featExtC = (FeatureExtractorResource_ImplBase) Class.forName(featExt).newInstance();
checkIfIsPairFeatureExtractor(featExt, featExtC);
checkErrorConditionImplementsConflictingFeatureExtractorInterfaces(featExt, featExtC);
}
}
use of org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase in project dkpro-tc by dkpro.
the class TestTaskUtils method testInstanceMultiplicationWithUnitId.
@Test
public void testInstanceMultiplicationWithUnitId() throws Exception {
JCas jCas = initJCas(true);
FeatureExtractorResource_ImplBase[] featureExtractors = {};
InstanceExtractor ie = new InstanceExtractor(Constants.FM_SEQUENCE, featureExtractors, true);
List<Instance> multipleInstances = ie.getInstances(jCas, false);
assertEquals(6, multipleInstances.size());
int idx = 0;
assertEquals("4711_0_0_a", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(0, multipleInstances.get(idx).getSequencePosition());
assertEquals("DT", multipleInstances.get(idx).getOutcome());
idx = 1;
assertEquals("4711_0_1_car", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(1, multipleInstances.get(idx).getSequencePosition());
assertEquals("NN", multipleInstances.get(idx).getOutcome());
idx = 2;
assertEquals("4711_0_2_drives", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(2, multipleInstances.get(idx).getSequencePosition());
assertEquals("VBZ", multipleInstances.get(idx).getOutcome());
idx = 3;
assertEquals("4711_1_0_the", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(1, multipleInstances.get(idx).getSequenceId());
assertEquals(0, multipleInstances.get(idx).getSequencePosition());
assertEquals("DT", multipleInstances.get(idx).getOutcome());
idx = 4;
assertEquals("4711_1_1_hedgehogs", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(1, multipleInstances.get(idx).getSequenceId());
assertEquals(1, multipleInstances.get(idx).getSequencePosition());
assertEquals("NN", multipleInstances.get(idx).getOutcome());
idx = 5;
assertEquals("4711_1_2_dies", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(1, multipleInstances.get(idx).getSequenceId());
assertEquals(2, multipleInstances.get(idx).getSequencePosition());
assertEquals("VBZ", multipleInstances.get(idx).getOutcome());
}
use of org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase in project dkpro-tc by dkpro.
the class TestTaskUtils method testUnitModeInstanceNumbering.
// test numeration for unit mode i.e. no sequence
@Test
public void testUnitModeInstanceNumbering() throws Exception {
JCas jCas = initJCas(true);
FeatureExtractorResource_ImplBase[] featureExtractors = {};
InstanceExtractor ie = new InstanceExtractor(Constants.FM_UNIT, featureExtractors, true);
List<Instance> multipleInstances = ie.getInstances(jCas, false);
assertEquals(6, multipleInstances.size());
int idx = 0;
assertEquals("4711_0_a", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(0, multipleInstances.get(idx).getSequencePosition());
assertEquals("DT", multipleInstances.get(idx).getOutcome());
idx = 1;
assertEquals("4711_1_car", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(1, multipleInstances.get(idx).getSequencePosition());
assertEquals("NN", multipleInstances.get(idx).getOutcome());
idx = 2;
assertEquals("4711_2_drives", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(2, multipleInstances.get(idx).getSequencePosition());
assertEquals("VBZ", multipleInstances.get(idx).getOutcome());
idx = 3;
assertEquals("4711_3_the", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(3, multipleInstances.get(idx).getSequencePosition());
assertEquals("DT", multipleInstances.get(idx).getOutcome());
idx = 4;
assertEquals("4711_4_hedgehogs", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(4, multipleInstances.get(idx).getSequencePosition());
assertEquals("NN", multipleInstances.get(idx).getOutcome());
idx = 5;
assertEquals("4711_5_dies", multipleInstances.get(idx).getFeatures().iterator().next().getValue());
assertEquals(0, multipleInstances.get(idx).getSequenceId());
assertEquals(5, multipleInstances.get(idx).getSequencePosition());
assertEquals("VBZ", multipleInstances.get(idx).getOutcome());
}
Aggregations