Search in sources :

Example 1 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class AnnotationPage method actionCompletePreferencesChange.

private void actionCompletePreferencesChange(AjaxRequestTarget aTarget) {
    try {
        AnnotatorState state = getModelObject();
        JCas jCas = getEditorCas();
        // The number of visible sentences may have changed - let the state recalculate
        // the visible sentences
        Sentence sentence = selectByAddr(jCas, Sentence.class, state.getFirstVisibleUnitAddress());
        state.setFirstVisibleUnit(sentence);
        // The selection of layers may have changed. Update the dropdown
        detailEditor.getAnnotationFeatureForm().updateLayersDropdown();
        AnnotationEditorBase newAnnotationEditor = createAnnotationEditor();
        annotationEditor.replaceWith(newAnnotationEditor);
        annotationEditor = newAnnotationEditor;
        // Reload all AJAX-enabled children of the page but not the page itself!
        WicketUtil.refreshPage(aTarget, getPage());
    } catch (Exception e) {
        LOG.info("Error reading CAS " + e.getMessage());
        error("Error reading CAS " + e.getMessage());
    }
}
Also used : AnnotationEditorBase(de.tudarmstadt.ukp.clarin.webanno.api.annotation.AnnotationEditorBase) AnnotatorState(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState) JCas(org.apache.uima.jcas.JCas) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) NoResultException(javax.persistence.NoResultException) IOException(java.io.IOException)

Example 2 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class AnnotationPageBase method ensureRequiredFeatureValuesSet.

/**
 * Checks if all required features on all annotations are set. If a required feature value is
 * missing, then the method scrolls to that location and schedules a re-rendering. In such
 * a case, an {@link IllegalStateException} is thrown.
 */
protected void ensureRequiredFeatureValuesSet(AjaxRequestTarget aTarget, JCas aJcas) {
    AnnotatorState state = getModelObject();
    CAS editorCas = aJcas.getCas();
    for (AnnotationLayer layer : annotationService.listAnnotationLayer(state.getProject())) {
        TypeAdapter adapter = annotationService.getAdapter(layer);
        List<AnnotationFeature> features = annotationService.listAnnotationFeature(layer);
        // If no feature is required, then we can skip the whole procedure
        if (features.stream().allMatch((f) -> !f.isRequired())) {
            continue;
        }
        // Check each feature structure of this layer
        for (AnnotationFS fs : select(editorCas, adapter.getAnnotationType(editorCas))) {
            for (AnnotationFeature f : features) {
                if (WebAnnoCasUtil.isRequiredFeatureMissing(f, fs)) {
                    // Find the sentence that contains the annotation with the missing
                    // required feature value
                    Sentence s = WebAnnoCasUtil.getSentence(aJcas, fs.getBegin());
                    // Put this sentence into the focus
                    state.setFirstVisibleUnit(s);
                    actionRefreshDocument(aTarget);
                    // Inform the user
                    throw new IllegalStateException("Document cannot be marked as finished. Annotation with ID [" + WebAnnoCasUtil.getAddr(fs) + "] on layer [" + layer.getUiName() + "] is missing value for feature [" + f.getUiName() + "].");
                }
            }
        }
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) CAS(org.apache.uima.cas.CAS) AnnotatorState(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState) TypeAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.TypeAdapter) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)

Example 3 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class AnnotationSidebar_ImplBase method actionShowSelectedDocument.

/**
 * Show the next document if it exists, starting in a certain begin offset
 */
protected void actionShowSelectedDocument(AjaxRequestTarget aTarget, SourceDocument aDocument, int aBeginOffset) throws IOException {
    annotationPage.actionShowSelectedDocument(aTarget, aDocument);
    AnnotatorState state = getModelObject();
    JCas jCas = annotationPage.getEditorCas();
    int sentenceNumber = WebAnnoCasUtil.getSentenceNumber(jCas, aBeginOffset);
    Sentence sentence = WebAnnoCasUtil.getSentence(jCas, aBeginOffset);
    annotationPage.getGotoPageTextField().setModelObject(sentenceNumber);
    state.setFirstVisibleUnit(sentence);
    state.setFocusUnitIndex(sentenceNumber);
    annotationPage.actionRefreshDocument(aTarget);
}
Also used : AnnotatorState(de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState) JCas(org.apache.uima.jcas.JCas) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 4 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class BratAnnotatorUtility method clearJcasAnnotations.

public static JCas clearJcasAnnotations(JCas aJCas, SourceDocument aSourceDocument, User aUser, DocumentService repository) throws IOException {
    JCas target;
    try {
        target = JCasFactory.createJCas();
    } catch (UIMAException e) {
        throw new IOException(e);
    }
    // Copy the CAS - basically we do this just to keep the full type system information
    CASCompleteSerializer serializer = serializeCASComplete(aJCas.getCasImpl());
    deserializeCASComplete(serializer, (CASImpl) target.getCas());
    // Re-init JCas
    try {
        target.getCas().getJCas();
    } catch (CASException e) {
        throw new IOException(e);
    }
    // Remove all annotations from the target CAS but we keep the type system!
    target.reset();
    // Copy over essential information
    DocumentMetaData.copy(aJCas, target);
    // DKPro Core Issue 435
    target.setDocumentLanguage(aJCas.getDocumentLanguage());
    target.setDocumentText(aJCas.getDocumentText());
    // Transfer token boundaries
    for (Token t : select(aJCas, Token.class)) {
        new Token(target, t.getBegin(), t.getEnd()).addToIndexes();
    }
    // Transfer sentence boundaries
    for (Sentence s : select(aJCas, Sentence.class)) {
        new Sentence(target, s.getBegin(), s.getEnd()).addToIndexes();
    }
    repository.writeAnnotationCas(target, aSourceDocument, aUser, false);
    return target;
}
Also used : CASCompleteSerializer(org.apache.uima.cas.impl.CASCompleteSerializer) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) IOException(java.io.IOException) CASException(org.apache.uima.cas.CASException) UIMAException(org.apache.uima.UIMAException) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 5 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class Tsv3XCasDocumentBuilder method of.

public static TsvDocument of(TsvSchema aSchema, JCas aJCas) {
    TsvFormatHeader format = new TsvFormatHeader("WebAnno TSV", "3.2");
    TsvDocument doc = new TsvDocument(format, aSchema, aJCas);
    // Fill document with all the sentences and tokens
    for (Sentence uimaSentence : select(aJCas, Sentence.class)) {
        TsvSentence sentence = doc.createSentence(uimaSentence);
        for (Token uimaToken : selectCovered(Token.class, uimaSentence)) {
            sentence.createToken(uimaToken);
        }
    }
    // Scan for chains
    for (Type headType : aSchema.getChainHeadTypes()) {
        for (FeatureStructure chainHead : CasUtil.selectFS(aJCas.getCas(), headType)) {
            List<AnnotationFS> elements = new ArrayList<>();
            AnnotationFS link = getFeature(chainHead, CHAIN_FIRST_FEAT, AnnotationFS.class);
            while (link != null) {
                elements.add(link);
                link = getFeature(link, CHAIN_NEXT_FEAT, AnnotationFS.class);
            }
            if (!elements.isEmpty()) {
                Type elementType = headType.getFeatureByBaseName(CHAIN_FIRST_FEAT).getRange();
                doc.createChain(headType, elementType, elements);
            }
        }
    }
    // Build indexes over the token start and end positions such that we can quickly locate
    // tokens based on their offsets.
    NavigableMap<Integer, TsvToken> tokenBeginIndex = new TreeMap<>();
    NavigableMap<Integer, TsvToken> tokenEndIndex = new TreeMap<>();
    List<TsvToken> tokens = new ArrayList<>();
    for (TsvSentence sentence : doc.getSentences()) {
        for (TsvToken token : sentence.getTokens()) {
            tokenBeginIndex.put(token.getBegin(), token);
            tokenEndIndex.put(token.getEnd(), token);
            tokens.add(token);
        }
    }
    // units.
    for (Type type : aSchema.getUimaTypes()) {
        LayerType layerType = aSchema.getLayerType(type);
        boolean addDisambiguationIdIfStacked = SPAN.equals(layerType);
        for (AnnotationFS annotation : CasUtil.select(aJCas.getCas(), type)) {
            doc.activateType(annotation.getType());
            // Get the relevant begin and end offsets for the current annotation
            int begin = annotation.getBegin();
            int end = annotation.getEnd();
            // to be sure.
            if (RELATION.equals(layerType)) {
                AnnotationFS targetFS = getFeature(annotation, FEAT_REL_TARGET, AnnotationFS.class);
                begin = targetFS.getBegin();
                end = targetFS.getEnd();
            }
            TsvToken beginToken = tokenBeginIndex.floorEntry(begin).getValue();
            TsvToken endToken = tokenEndIndex.ceilingEntry(end).getValue();
            // value obtained from the tokenBeginIndex.
            if (begin == end) {
                beginToken = endToken;
            }
            boolean singleToken = beginToken == endToken;
            boolean zeroWitdh = begin == end;
            boolean multiTokenCapable = SPAN.equals(layerType) || CHAIN.equals(layerType);
            // in either case.
            if (beginToken.getBegin() == begin && endToken.getEnd() == end) {
                doc.mapFS2Unit(annotation, beginToken);
                beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                if (multiTokenCapable) {
                    endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                }
            } else if (zeroWitdh) {
                TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
                doc.mapFS2Unit(annotation, t);
                t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
            } else {
                // the annotation.
                if (beginToken.getBegin() < begin) {
                    TsvSubToken t = beginToken.createSubToken(begin, min(beginToken.getEnd(), end));
                    doc.mapFS2Unit(annotation, t);
                    t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                } else // If not the sub-token is ID-defining, then the begin token is ID-defining
                {
                    beginToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    doc.mapFS2Unit(annotation, beginToken);
                }
                // checking if if singleToke is true.
                if (endToken.getEnd() > end) {
                    TsvSubToken t = endToken.createSubToken(max(endToken.getBegin(), begin), end);
                    t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    if (!singleToken) {
                        doc.mapFS2Unit(annotation, t);
                    }
                } else if (!singleToken && multiTokenCapable) {
                    endToken.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                }
            }
            // the end token
            if (multiTokenCapable && !singleToken) {
                ListIterator<TsvToken> i = tokens.listIterator(tokens.indexOf(beginToken));
                TsvToken t;
                while ((t = i.next()) != endToken) {
                    if (t != beginToken) {
                        t.addUimaAnnotation(annotation, addDisambiguationIdIfStacked);
                    }
                }
            }
            // Multi-token span annotations must get a disambiguation ID
            if (SPAN.equals(layerType) && !singleToken) {
                doc.addDisambiguationId(annotation);
            }
        }
    }
    // Scan all created units to see which columns actually contains values
    for (TsvSentence sentence : doc.getSentences()) {
        for (TsvToken token : sentence.getTokens()) {
            scanUnitForActiveColumns(token);
            scanUnitForAmbiguousSlotReferences(token);
            for (TsvSubToken subToken : token.getSubTokens()) {
                scanUnitForActiveColumns(subToken);
                scanUnitForAmbiguousSlotReferences(subToken);
            }
        }
    }
    // Activate the placeholder columns for any active types for which no other columns are
    // active.
    Set<Type> activeTypesNeedingPlaceholders = new HashSet<>(doc.getActiveTypes());
    for (TsvColumn col : doc.getActiveColumns()) {
        activeTypesNeedingPlaceholders.remove(col.uimaType);
    }
    for (TsvColumn col : doc.getSchema().getColumns()) {
        if (PLACEHOLDER.equals(col.featureType) && activeTypesNeedingPlaceholders.contains(col.uimaType)) {
            doc.activateColumn(col);
        }
    }
    return doc;
}
Also used : TsvFormatHeader(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvFormatHeader) ArrayList(java.util.ArrayList) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence) TreeMap(java.util.TreeMap) TsvSubToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSubToken) FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) Type(org.apache.uima.cas.Type) TsvColumn(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvColumn) LayerType(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.LayerType) TsvDocument(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvDocument) TsvToken(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvToken) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) TsvSentence(de.tudarmstadt.ukp.clarin.webanno.tsv.internal.tsv3x.model.TsvSentence) HashSet(java.util.HashSet)

Aggregations

Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)90 JCas (org.apache.uima.jcas.JCas)41 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)34 ArrayList (java.util.ArrayList)22 AnnotatorState (de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState)14 Type (org.apache.uima.cas.Type)12 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)12 IOException (java.io.IOException)9 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)8 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)8 Test (org.junit.Test)8 HashMap (java.util.HashMap)7 TokenBuilder (org.apache.uima.fit.testing.factory.TokenBuilder)7 AnnotationException (de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException)6 WebAnnoCasUtil.getFirstSentence (de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getFirstSentence)6 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)6 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)6 FrequencyDistribution (de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution)6 CASException (org.apache.uima.cas.CASException)6 AutomationTypeAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.AutomationTypeAdapter)5