use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.
the class InstanceIdFeatureTest method instanceIdFeatureTest.
@Test
public void instanceIdFeatureTest() throws Exception {
AnalysisEngine engine = createEngine(NoOpAnnotator.class);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
engine.process(jcas);
TextClassificationTarget unit1 = new TextClassificationTarget(jcas, 0, 1);
unit1.setId(0);
unit1.addToIndexes();
JCasId id = new JCasId(jcas);
id.setId(123);
id.addToIndexes();
Feature feature = InstanceIdFeature.retrieve(jcas, unit1);
Feature feature2 = InstanceIdFeature.retrieve(jcas);
Feature feature3 = InstanceIdFeature.retrieve(jcas, unit1, 5);
assertEquals(feature.getValue(), "123_0");
assertEquals(feature2.getValue(), "123");
assertEquals(feature3.getValue(), "123_5_0");
}
use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.
the class IdentificationCollector method processSequenceMode.
private void processSequenceMode(JCas aJCas) throws AnalysisEngineProcessException {
int jcasId = JCasUtil.selectSingle(aJCas, JCasId.class).getId();
try {
int seqId = 0;
Collection<AnnotationFS> sequences = CasUtil.select(aJCas.getCas(), sequenceSpanType);
for (AnnotationFS s : sequences) {
List<TextClassificationTarget> targets = new ArrayList<TextClassificationTarget>(JCasUtil.selectCovered(aJCas, TextClassificationTarget.class, s));
for (int i = 0; i < targets.size(); i++) {
TextClassificationTarget tco = targets.get(i);
// This formatted identification will allow sorting the
// information in sequence. This
// leads to a human readable id2outcome report
String identification = String.format("%06d_%06d_%06d", jcasId, seqId, i);
writer.write(identification + "\t" + tco.getCoveredText());
if (i + 1 < targets.size()) {
writer.write(System.lineSeparator());
}
if (maximumLength != null && maximumLength > 0 && i + 1 >= maximumLength) {
break;
}
}
writer.write(System.lineSeparator());
seqId++;
}
} catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.
the class InstanceExtractor method getSingleInstanceUnit.
private Instance getSingleInstanceUnit(Instance instance, JCas jcas, boolean supportsSparseFeature) throws Exception {
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
TextClassificationTarget unit = JCasUtil.selectSingle(jcas, TextClassificationTarget.class);
if (addInstanceId) {
instance.addFeature(InstanceIdFeature.retrieve(jcas, unit));
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (supportsSparseFeature) {
instance.addFeatures(getSparse(jcas, unit, featExt));
} else {
instance.addFeatures(getDense(jcas, unit, featExt));
}
instance.setOutcomes(getOutcomes(jcas, unit));
instance.setWeight(getWeight(jcas, unit));
instance.setJcasId(jcasId);
}
return instance;
}
use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.
the class InstanceExtractor method getUnitInstances.
public List<Instance> getUnitInstances(JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
List<Instance> instances = new ArrayList<Instance>();
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
Collection<TextClassificationTarget> targets = JCasUtil.select(jcas, TextClassificationTarget.class);
for (TextClassificationTarget aTarget : targets) {
Instance instance = new Instance();
if (addInstanceId) {
Feature feat = InstanceIdFeature.retrieve(jcas, aTarget);
instance.addFeature(feat);
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (!(featExt instanceof FeatureExtractor)) {
throw new TextClassificationException("Feature extractor does not implement interface [" + FeatureExtractor.class.getName() + "]: " + featExt.getResourceName());
}
if (supportSparseFeatures) {
instance.addFeatures(getSparse(jcas, aTarget, featExt));
} else {
instance.addFeatures(getDense(jcas, aTarget, featExt));
}
}
// set and write outcome label(s)
instance.setOutcomes(getOutcomes(jcas, aTarget));
instance.setWeight(getWeight(jcas, aTarget));
instance.setJcasId(jcasId);
// instance.setSequenceId(sequenceId);
instance.setSequencePosition(aTarget.getId());
instances.add(instance);
}
return instances;
}
use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.
the class InstanceExtractor method getSingleInstancePair.
private Instance getSingleInstancePair(Instance instance, JCas jcas) throws TextClassificationException {
try {
int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
if (addInstanceId) {
instance.addFeature(InstanceIdFeature.retrieve(jcas));
}
for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
if (!(featExt instanceof PairFeatureExtractor)) {
throw new TextClassificationException("Using non-pair FE in pair mode: " + featExt.getResourceName());
}
JCas view1 = jcas.getView(Constants.PART_ONE);
JCas view2 = jcas.getView(Constants.PART_TWO);
instance.setOutcomes(getOutcomes(jcas, null));
instance.setWeight(getWeight(jcas, null));
instance.setJcasId(jcasId);
instance.addFeatures(((PairFeatureExtractor) featExt).extract(view1, view2));
}
} catch (CASException e) {
throw new TextClassificationException(e);
}
return instance;
}
Aggregations