Search in sources :

Example 36 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class SuggestionBuilder method updateCrossSentAnnoList.

private void updateCrossSentAnnoList(Map<Integer, Integer> aSegmentBeginEnd, Map<Integer, Integer> aSegmentNumber, Map<String, JCas> aJCases, List<Type> aEntryTypes) {
    // FIXME Remove this side-effect and instead return this hashmap
    crossSentenceLists = new HashMap<>();
    // Extract the sentences for all the CASes
    Map<JCas, List<Sentence>> idxSentences = new HashMap<>();
    for (JCas c : aJCases.values()) {
        idxSentences.put(c, new ArrayList<>(select(c, Sentence.class)));
    }
    Set<Integer> sentenceBegins = aSegmentBeginEnd.keySet();
    int count = 0;
    for (int sentBegin : sentenceBegins) {
        count++;
        if (count % 100 == 0) {
            log.debug("Updating cross-sentence annoations: {} of {} sentences...", count, sentenceBegins.size());
        }
        int sentEnd = aSegmentBeginEnd.get(sentBegin);
        int currentSentenceNumber = -1;
        Set<Integer> crossSents = new HashSet<>();
        for (Type t : aEntryTypes) {
            for (JCas c : aJCases.values()) {
                // same for all others anyway.
                if (currentSentenceNumber == -1) {
                    currentSentenceNumber = aSegmentNumber.get(sentBegin);
                }
                // update cross-sentence annotation lists
                for (AnnotationFS fs : selectCovered(c.getCas(), t, diffRangeBegin, diffRangeEnd)) {
                    // CASE 1. annotation begins here
                    if (sentBegin <= fs.getBegin() && fs.getBegin() <= sentEnd) {
                        if (fs.getEnd() < sentBegin || sentEnd < fs.getEnd()) {
                            Sentence s = getSentenceByAnnoEnd(idxSentences.get(c), fs.getEnd());
                            int thatSent = idxSentences.get(c).indexOf(s) + 1;
                            crossSents.add(thatSent);
                        }
                    } else // CASE 2. Annotation ends here
                    if (sentBegin <= fs.getEnd() && fs.getEnd() <= sentEnd) {
                        if (fs.getBegin() < sentBegin || sentEnd < fs.getBegin()) {
                            int thatSent = WebAnnoCasUtil.getSentenceNumber(c, fs.getBegin());
                            crossSents.add(thatSent);
                        }
                    }
                }
                for (AnnotationFS fs : selectCovered(c.getCas(), t, sentBegin, diffRangeEnd)) {
                    if (fs.getBegin() <= sentEnd && fs.getEnd() > sentEnd) {
                        Sentence s = getSentenceByAnnoEnd(idxSentences.get(c), fs.getEnd());
                        aSegmentBeginEnd.put(sentBegin, s.getEnd());
                    }
                }
            }
        }
        crossSentenceLists.put(currentSentenceNumber, crossSents);
    }
}
Also used : HashMap(java.util.HashMap) JCas(org.apache.uima.jcas.JCas) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) HashSet(java.util.HashSet)

Example 37 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class AutomationUtil method addOtherFeatureFromAnnotation.

/**
 * If the training file or the test file already contain the "Other layer" annotations, get the
 * UIMA annotation and add it as a feature - no need to train and predict for this "other layer"
 */
private static void addOtherFeatureFromAnnotation(AnnotationFeature aFeature, DocumentService aRepository, AutomationService aAutomationServic, AnnotationSchemaService aAnnotationService, UserDao aUserDao, List<List<String>> aPredictions, SourceDocument aSourceDocument) throws UIMAException, ClassNotFoundException, IOException {
    AutomationTypeAdapter adapter = (AutomationTypeAdapter) aAnnotationService.getAdapter(aFeature.getLayer());
    List<String> annotations = new ArrayList<>();
    // this is training - all training documents will be converted to a single training file
    if (aSourceDocument == null) {
        for (TrainingDocument trainingDocument : aAutomationServic.listTrainingDocuments(aFeature.getProject())) {
            JCas jCas = aAutomationServic.readTrainingAnnotationCas(trainingDocument);
            for (Sentence sentence : select(jCas, Sentence.class)) {
                if (aFeature.getLayer().isMultipleTokens()) {
                    annotations.addAll(((SpanAdapter) adapter).getMultipleAnnotation(sentence, aFeature).values());
                } else {
                    annotations.addAll(adapter.getAnnotation(sentence, aFeature));
                }
            }
        }
        aPredictions.add(annotations);
    } else // This is SourceDocument to predict (in the suggestion pane)
    {
        User user = aUserDao.getCurrentUser();
        AnnotationDocument annodoc = aRepository.createOrGetAnnotationDocument(aSourceDocument, user);
        JCas jCas = aRepository.readAnnotationCas(annodoc);
        for (Sentence sentence : select(jCas, Sentence.class)) {
            if (aFeature.getLayer().isMultipleTokens()) {
                annotations.addAll(((SpanAdapter) adapter).getMultipleAnnotation(sentence, aFeature).values());
            } else {
                annotations.addAll(adapter.getAnnotation(sentence, aFeature));
            }
        }
        aPredictions.add(annotations);
    }
}
Also used : User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) SpanAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.SpanAdapter) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AutomationTypeAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.AutomationTypeAdapter) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)

Example 38 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class AutomationUtil method repeateRelationAnnotation.

public static void repeateRelationAnnotation(AnnotatorState aState, DocumentService aDocumentService, CorrectionDocumentService aCorrectionDocumentService, AnnotationSchemaService aAnnotationService, AnnotationFS fs, AnnotationFeature aFeature, String aValue) throws UIMAException, ClassNotFoundException, IOException, AnnotationException {
    for (SourceDocument d : aDocumentService.listSourceDocuments(aState.getProject())) {
        loadDocument(d, aAnnotationService, aDocumentService, aCorrectionDocumentService, aState.getUser());
        JCas jCas = aCorrectionDocumentService.readCorrectionCas(d);
        ArcAdapter adapter = (ArcAdapter) aAnnotationService.getAdapter(aFeature.getLayer());
        String sourceFName = adapter.getSourceFeatureName();
        String targetFName = adapter.getTargetFeatureName();
        Type type = getType(jCas.getCas(), aFeature.getLayer().getName());
        Type spanType = getType(jCas.getCas(), adapter.getAttachTypeName());
        Feature arcSpanFeature = spanType.getFeatureByBaseName(adapter.getAttachFeatureName());
        Feature dependentFeature = type.getFeatureByBaseName(targetFName);
        Feature governorFeature = type.getFeatureByBaseName(sourceFName);
        AnnotationFS dependentFs = null;
        AnnotationFS governorFs = null;
        if (adapter.getAttachFeatureName() != null) {
            dependentFs = (AnnotationFS) fs.getFeatureValue(dependentFeature).getFeatureValue(arcSpanFeature);
            governorFs = (AnnotationFS) fs.getFeatureValue(governorFeature).getFeatureValue(arcSpanFeature);
        } else {
            dependentFs = (AnnotationFS) fs.getFeatureValue(dependentFeature);
            governorFs = (AnnotationFS) fs.getFeatureValue(governorFeature);
        }
        if (adapter.isCrossMultipleSentence()) {
            List<AnnotationFS> mSpanAnnos = new ArrayList<>(getAllAnnoFss(jCas, governorFs.getType()));
            repeatRelation(aState, 0, jCas.getDocumentText().length() - 1, aFeature, aValue, jCas, adapter, dependentFs, governorFs, mSpanAnnos);
        } else {
            for (Sentence sent : select(jCas, Sentence.class)) {
                List<AnnotationFS> spanAnnos = selectCovered(governorFs.getType(), sent);
                repeatRelation(aState, sent.getBegin(), sent.getEnd(), aFeature, aValue, jCas, adapter, dependentFs, governorFs, spanAnnos);
            }
        }
        aCorrectionDocumentService.writeCorrectionCas(jCas, d);
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) ArcAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.ArcAdapter) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Feature(org.apache.uima.cas.Feature) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 39 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class CorrectionPage method actionCompletePreferencesChange.

private void actionCompletePreferencesChange(AjaxRequestTarget aTarget) {
    try {
        AnnotatorState state = getModelObject();
        JCas editorCas = getEditorCas();
        // The number of visible sentences may have changed - let the state recalculate
        // the visible sentences
        Sentence sentence = selectByAddr(editorCas, Sentence.class, state.getFirstVisibleUnitAddress());
        state.setFirstVisibleUnit(sentence);
        SuggestionBuilder builder = new SuggestionBuilder(casStorageService, documentService, correctionDocumentService, curationDocumentService, annotationService, userRepository);
        curationContainer = builder.buildCurationContainer(state);
        setCurationSegmentBeginEnd(editorCas);
        curationContainer.setBratAnnotatorModel(state);
        update(aTarget);
        aTarget.appendJavaScript("Wicket.Window.unloadConfirmation = false;window.location.reload()");
        // Re-render the whole page because the width of the sidebar may have changed
        aTarget.add(this);
    } catch (Exception e) {
        handleException(aTarget, e);
    }
}
Also used : AnnotatorState(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState) JCas(org.apache.uima.jcas.JCas) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) SuggestionBuilder(de.tudarmstadt.ukp.clarin.webanno.ui.curation.component.model.SuggestionBuilder) AnnotationException(de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException) UIMAException(org.apache.uima.UIMAException) IOException(java.io.IOException)

Example 40 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project dkpro-tc by dkpro.

the class LinewiseTextReader method checkSetSentence.

private void checkSetSentence(JCas aJCas) {
    if (setSentence) {
        Sentence sentence = new Sentence(aJCas, 0, aJCas.getDocumentText().length());
        sentence.addToIndexes();
    }
}
Also used : Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Aggregations

Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)90 JCas (org.apache.uima.jcas.JCas)41 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)34 ArrayList (java.util.ArrayList)22 AnnotatorState (de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState)14 Type (org.apache.uima.cas.Type)12 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)12 IOException (java.io.IOException)9 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)8 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)8 Test (org.junit.Test)8 HashMap (java.util.HashMap)7 TokenBuilder (org.apache.uima.fit.testing.factory.TokenBuilder)7 AnnotationException (de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException)6 WebAnnoCasUtil.getFirstSentence (de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getFirstSentence)6 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)6 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)6 FrequencyDistribution (de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution)6 CASException (org.apache.uima.cas.CASException)6 AutomationTypeAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.AutomationTypeAdapter)5