Search in sources :

Example 21 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class IdentificationCollector method processSequenceMode.

private void processSequenceMode(JCas aJCas) throws AnalysisEngineProcessException {
    int jcasId = JCasUtil.selectSingle(aJCas, JCasId.class).getId();
    try {
        int seqId = 0;
        Collection<AnnotationFS> sequences = CasUtil.select(aJCas.getCas(), sequenceSpanType);
        for (AnnotationFS s : sequences) {
            List<TextClassificationTarget> targets = new ArrayList<TextClassificationTarget>(JCasUtil.selectCovered(aJCas, TextClassificationTarget.class, s));
            for (int i = 0; i < targets.size(); i++) {
                TextClassificationTarget tco = targets.get(i);
                // This formatted identification will allow sorting the
                // information in sequence. This
                // leads to a human readable id2outcome report
                String identification = String.format("%06d_%06d_%06d", jcasId, seqId, i);
                writer.write(identification + "\t" + tco.getCoveredText());
                if (i + 1 < targets.size()) {
                    writer.write(System.lineSeparator());
                }
                if (maximumLength != null && maximumLength > 0 && i + 1 >= maximumLength) {
                    break;
                }
            }
            writer.write(System.lineSeparator());
            seqId++;
        }
    } catch (IOException e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) ArrayList(java.util.ArrayList) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 22 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class InstanceExtractor method getSingleInstanceUnit.

private Instance getSingleInstanceUnit(Instance instance, JCas jcas, boolean supportsSparseFeature) throws Exception {
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    TextClassificationTarget unit = JCasUtil.selectSingle(jcas, TextClassificationTarget.class);
    if (addInstanceId) {
        instance.addFeature(InstanceIdFeature.retrieve(jcas, unit));
    }
    for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
        if (supportsSparseFeature) {
            instance.addFeatures(getSparse(jcas, unit, featExt));
        } else {
            instance.addFeatures(getDense(jcas, unit, featExt));
        }
        instance.setOutcomes(getOutcomes(jcas, unit));
        instance.setWeight(getWeight(jcas, unit));
        instance.setJcasId(jcasId);
    }
    return instance;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)

Example 23 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class InstanceExtractor method getUnitInstances.

public List<Instance> getUnitInstances(JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
    List<Instance> instances = new ArrayList<Instance>();
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    Collection<TextClassificationTarget> targets = JCasUtil.select(jcas, TextClassificationTarget.class);
    for (TextClassificationTarget aTarget : targets) {
        Instance instance = new Instance();
        if (addInstanceId) {
            Feature feat = InstanceIdFeature.retrieve(jcas, aTarget);
            instance.addFeature(feat);
        }
        for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
            if (!(featExt instanceof FeatureExtractor)) {
                throw new TextClassificationException("Feature extractor does not implement interface [" + FeatureExtractor.class.getName() + "]: " + featExt.getResourceName());
            }
            if (supportSparseFeatures) {
                instance.addFeatures(getSparse(jcas, aTarget, featExt));
            } else {
                instance.addFeatures(getDense(jcas, aTarget, featExt));
            }
        }
        // set and write outcome label(s)
        instance.setOutcomes(getOutcomes(jcas, aTarget));
        instance.setWeight(getWeight(jcas, aTarget));
        instance.setJcasId(jcasId);
        // instance.setSequenceId(sequenceId);
        instance.setSequencePosition(aTarget.getId());
        instances.add(instance);
    }
    return instances;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) FeatureExtractor(org.dkpro.tc.api.features.FeatureExtractor) PairFeatureExtractor(org.dkpro.tc.api.features.PairFeatureExtractor) Instance(org.dkpro.tc.api.features.Instance) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) Feature(org.dkpro.tc.api.features.Feature) InstanceIdFeature(org.dkpro.tc.core.feature.InstanceIdFeature) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)

Example 24 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class SequenceOutcomeAnnotator_ImplBase method process.

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    for (TextClassificationTarget unit : JCasUtil.selectCovered(jcas, TextClassificationTarget.class, JCasUtil.selectSingle(jcas, TextClassificationSequence.class))) {
        TextClassificationOutcome outcome = new TextClassificationOutcome(jcas, unit.getBegin(), unit.getEnd());
        outcome.setOutcome(getTextClassificationOutcome(jcas, unit));
        outcome.setWeight(getTextClassificationOutcomeWeight(jcas, unit));
        outcome.addToIndexes();
    }
}
Also used : TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) TextClassificationSequence(org.dkpro.tc.api.type.TextClassificationSequence)

Example 25 with TextClassificationTarget

use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.

the class PairReader_ImplBase method createView.

protected void createView(String part, JCas jCas, String language, String text, String docId, String docTitle) throws CASException {
    JCas view = jCas.createView(part.toString());
    view.setDocumentText(text);
    view.setDocumentLanguage(language);
    TextClassificationTarget aTarget = new TextClassificationTarget(view, 0, text.length());
    aTarget.addToIndexes();
    DocumentMetaData baseMetaData = DocumentMetaData.get(jCas);
    createMetaData(view, baseMetaData.getCollectionId(), docId, docTitle);
}
Also used : TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) DocumentMetaData(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)

Aggregations

TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)61 JCas (org.apache.uima.jcas.JCas)29 ArrayList (java.util.ArrayList)22 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)18 Feature (org.dkpro.tc.api.features.Feature)16 Test (org.junit.Test)16 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)12 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)12 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)11 JCasId (org.dkpro.tc.api.type.JCasId)11 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)8 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)7 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)7 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)6 CollectionReader (org.apache.uima.collection.CollectionReader)5 FeatureExtractorResource_ImplBase (org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)5 DocumentMetaData (de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData)4 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4