Search in sources :

Example 6 with JCasId

use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.

the class InstanceIdFeatureTest method instanceIdFeatureTest.

@Test
public void instanceIdFeatureTest() throws Exception {
    AnalysisEngine engine = createEngine(NoOpAnnotator.class);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    engine.process(jcas);
    TextClassificationTarget unit1 = new TextClassificationTarget(jcas, 0, 1);
    unit1.setId(0);
    unit1.addToIndexes();
    JCasId id = new JCasId(jcas);
    id.setId(123);
    id.addToIndexes();
    Feature feature = InstanceIdFeature.retrieve(jcas, unit1);
    Feature feature2 = InstanceIdFeature.retrieve(jcas);
    Feature feature3 = InstanceIdFeature.retrieve(jcas, unit1, 5);
    assertEquals(feature.getValue(), "123_0");
    assertEquals(feature2.getValue(), "123");
    assertEquals(feature3.getValue(), "123_5_0");
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 7 with JCasId

use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.

the class IdentificationCollector method processSequenceMode.

private void processSequenceMode(JCas aJCas) throws AnalysisEngineProcessException {
    int jcasId = JCasUtil.selectSingle(aJCas, JCasId.class).getId();
    try {
        int seqId = 0;
        Collection<AnnotationFS> sequences = CasUtil.select(aJCas.getCas(), sequenceSpanType);
        for (AnnotationFS s : sequences) {
            List<TextClassificationTarget> targets = new ArrayList<TextClassificationTarget>(JCasUtil.selectCovered(aJCas, TextClassificationTarget.class, s));
            for (int i = 0; i < targets.size(); i++) {
                TextClassificationTarget tco = targets.get(i);
                // This formatted identification will allow sorting the
                // information in sequence. This
                // leads to a human readable id2outcome report
                String identification = String.format("%06d_%06d_%06d", jcasId, seqId, i);
                writer.write(identification + "\t" + tco.getCoveredText());
                if (i + 1 < targets.size()) {
                    writer.write(System.lineSeparator());
                }
                if (maximumLength != null && maximumLength > 0 && i + 1 >= maximumLength) {
                    break;
                }
            }
            writer.write(System.lineSeparator());
            seqId++;
        }
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 8 with JCasId

use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.

the class InstanceExtractor method getSingleInstanceUnit.

private Instance getSingleInstanceUnit(Instance instance, JCas jcas, boolean supportsSparseFeature) throws Exception {
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    TextClassificationTarget unit = JCasUtil.selectSingle(jcas, TextClassificationTarget.class);
    if (addInstanceId) {
        instance.addFeature(InstanceIdFeature.retrieve(jcas, unit));
    }
    for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
        if (supportsSparseFeature) {
            instance.addFeatures(getSparse(jcas, unit, featExt));
        } else {
            instance.addFeatures(getDense(jcas, unit, featExt));
        }
        instance.setOutcomes(getOutcomes(jcas, unit));
        instance.setWeight(getWeight(jcas, unit));
        instance.setJcasId(jcasId);
    }
    return instance;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)

Example 9 with JCasId

use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.

the class InstanceExtractor method getUnitInstances.

public List<Instance> getUnitInstances(JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
    List<Instance> instances = new ArrayList<Instance>();
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    Collection<TextClassificationTarget> targets = JCasUtil.select(jcas, TextClassificationTarget.class);
    for (TextClassificationTarget aTarget : targets) {
        Instance instance = new Instance();
        if (addInstanceId) {
            Feature feat = InstanceIdFeature.retrieve(jcas, aTarget);
            instance.addFeature(feat);
        }
        for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
            if (!(featExt instanceof FeatureExtractor)) {
                throw new TextClassificationException("Feature extractor does not implement interface [" + FeatureExtractor.class.getName() + "]: " + featExt.getResourceName());
            }
            if (supportSparseFeatures) {
                instance.addFeatures(getSparse(jcas, aTarget, featExt));
            } else {
                instance.addFeatures(getDense(jcas, aTarget, featExt));
            }
        }
        // set and write outcome label(s)
        instance.setOutcomes(getOutcomes(jcas, aTarget));
        instance.setWeight(getWeight(jcas, aTarget));
        instance.setJcasId(jcasId);
        // instance.setSequenceId(sequenceId);
        instance.setSequencePosition(aTarget.getId());
        instances.add(instance);
    }
    return instances;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) FeatureExtractor(org.dkpro.tc.api.features.FeatureExtractor) PairFeatureExtractor(org.dkpro.tc.api.features.PairFeatureExtractor) Instance(org.dkpro.tc.api.features.Instance) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) Feature(org.dkpro.tc.api.features.Feature) InstanceIdFeature(org.dkpro.tc.core.feature.InstanceIdFeature) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)

Example 10 with JCasId

use of org.dkpro.tc.api.type.JCasId in project dkpro-tc by dkpro.

the class InstanceExtractor method getSingleInstancePair.

private Instance getSingleInstancePair(Instance instance, JCas jcas) throws TextClassificationException {
    try {
        int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
        if (addInstanceId) {
            instance.addFeature(InstanceIdFeature.retrieve(jcas));
        }
        for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
            if (!(featExt instanceof PairFeatureExtractor)) {
                throw new TextClassificationException("Using non-pair FE in pair mode: " + featExt.getResourceName());
            }
            JCas view1 = jcas.getView(Constants.PART_ONE);
            JCas view2 = jcas.getView(Constants.PART_TWO);
            instance.setOutcomes(getOutcomes(jcas, null));
            instance.setWeight(getWeight(jcas, null));
            instance.setJcasId(jcasId);
            instance.addFeatures(((PairFeatureExtractor) featExt).extract(view1, view2));
        }
    } catch (CASException e) {
        throw new TextClassificationException(e);
    }
    return instance;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) PairFeatureExtractor(org.dkpro.tc.api.features.PairFeatureExtractor) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) JCas(org.apache.uima.jcas.JCas) CASException(org.apache.uima.cas.CASException) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)

Aggregations

JCasId (org.dkpro.tc.api.type.JCasId)24 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)11 JCas (org.apache.uima.jcas.JCas)9 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)8 CASException (org.apache.uima.cas.CASException)6 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)6 CollectionException (org.apache.uima.collection.CollectionException)5 ArrayList (java.util.ArrayList)4 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)4 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)3 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)3 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)3 Instance (org.dkpro.tc.api.features.Instance)3 PairFeatureExtractor (org.dkpro.tc.api.features.PairFeatureExtractor)3 IOException (java.io.IOException)2 Feature (org.dkpro.tc.api.features.Feature)2 FeatureExtractor (org.dkpro.tc.api.features.FeatureExtractor)2 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)2 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)1 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)1