Search in sources :

Example 71 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class SuggestionBuilder method updateSegment.

/**
 * Puts JCases into a list and get a random annotation document that will be used as a base for
 * the diff.
 */
private void updateSegment(AnnotatorState aBratAnnotatorModel, Map<Integer, Integer> aIdxSentenceBeginEnd, Map<Integer, Integer> aIdxSentenceBeginNumber, Map<String, Map<Integer, Integer>> aSegmentAdress, JCas aJCas, String aUsername, int aWindowStart, int aWindowEnd) {
    diffRangeBegin = aWindowStart;
    diffRangeEnd = aWindowEnd;
    // Get the number of the first sentence - instead of fetching the number over and over
    // we can just increment this one.
    int sentenceNumber = WebAnnoCasUtil.getSentenceNumber(aJCas, diffRangeBegin);
    aSegmentAdress.put(aUsername, new HashMap<>());
    for (Sentence sentence : selectCovered(aJCas, Sentence.class, diffRangeBegin, diffRangeEnd)) {
        aIdxSentenceBeginEnd.put(sentence.getBegin(), sentence.getEnd());
        aIdxSentenceBeginNumber.put(sentence.getBegin(), sentenceNumber);
        aSegmentAdress.get(aUsername).put(sentence.getBegin(), getAddr(sentence));
        sentenceNumber += 1;
    }
}
Also used : Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 72 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class CasDiff2Test method relationStackedSpansTest.

@Test
public void relationStackedSpansTest() throws Exception {
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
    JCas jcasA = JCasFactory.createJCas(merged);
    {
        CAS casA = jcasA.getCas();
        tb.buildTokens(jcasA, "This is a test .");
        List<Token> tokensA = new ArrayList<>(select(jcasA, Token.class));
        Token t1A = tokensA.get(0);
        Token t2A = tokensA.get(tokensA.size() - 1);
        NamedEntity govA = new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd());
        govA.addToIndexes();
        // Here we add a stacked named entity!
        new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd()).addToIndexes();
        NamedEntity depA = new NamedEntity(jcasA, t2A.getBegin(), t2A.getEnd());
        depA.addToIndexes();
        Type relationTypeA = casA.getTypeSystem().getType("webanno.custom.Relation");
        AnnotationFS fs1A = casA.createAnnotation(relationTypeA, depA.getBegin(), depA.getEnd());
        FSUtil.setFeature(fs1A, "Governor", govA);
        FSUtil.setFeature(fs1A, "Dependent", depA);
        FSUtil.setFeature(fs1A, "value", "REL");
        casA.addFsToIndexes(fs1A);
    }
    JCas jcasB = JCasFactory.createJCas(merged);
    {
        CAS casB = jcasB.getCas();
        tb.buildTokens(jcasB, "This is a test .");
        List<Token> tokensB = new ArrayList<>(select(jcasB, Token.class));
        Token t1B = tokensB.get(0);
        Token t2B = tokensB.get(tokensB.size() - 1);
        NamedEntity govB = new NamedEntity(jcasB, t1B.getBegin(), t1B.getEnd());
        govB.addToIndexes();
        NamedEntity depB = new NamedEntity(jcasB, t2B.getBegin(), t2B.getEnd());
        depB.addToIndexes();
        Type relationTypeB = casB.getTypeSystem().getType("webanno.custom.Relation");
        AnnotationFS fs1B = casB.createAnnotation(relationTypeB, depB.getBegin(), depB.getEnd());
        FSUtil.setFeature(fs1B, "Governor", govB);
        FSUtil.setFeature(fs1B, "Dependent", depB);
        FSUtil.setFeature(fs1B, "value", "REL");
        casB.addFsToIndexes(fs1B);
    }
    Map<String, List<JCas>> casByUser = new LinkedHashMap<>();
    casByUser.put("user1", asList(jcasA));
    casByUser.put("user2", asList(jcasB));
    List<String> entryTypes = asList("webanno.custom.Relation");
    List<? extends DiffAdapter> diffAdapters = asList(new ArcDiffAdapter("webanno.custom.Relation", WebAnnoConst.FEAT_REL_TARGET, WebAnnoConst.FEAT_REL_SOURCE, "value"));
    DiffResult diff = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
    diff.print(System.out);
    assertEquals(1, diff.size());
    assertEquals(0, diff.getDifferingConfigurationSets().size());
    assertEquals(0, diff.getIncompleteConfigurationSets().size());
    // Check against new impl
    AgreementResult agreement = AgreementUtils.getCohenKappaAgreement(diff, "webanno.custom.Relation", "value", casByUser);
    // Asserts
    System.out.printf("Agreement: %s%n", agreement.toString());
    AgreementUtils.dumpAgreementStudy(System.out, agreement);
    assertEquals(1, agreement.getPluralitySets().size());
}
Also used : AgreementResult(de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult) TokenBuilder(org.apache.uima.fit.testing.factory.TokenBuilder) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) ArcDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) LinkedHashMap(java.util.LinkedHashMap) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Test(org.junit.Test)

Example 73 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class BratAjaxCasUtilTest method testIsSameSentence.

@Test
public void testIsSameSentence() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    JCasBuilder jb = new JCasBuilder(jcas);
    Sentence s1 = jb.add("Sentence 1.", Sentence.class);
    jb.add(" ");
    Sentence s2 = jb.add("Sentence 2.", Sentence.class);
    jb.close();
    assertTrue(isSameSentence(jcas, s2.getBegin(), s2.getEnd()));
    assertTrue(isSameSentence(jcas, s2.getEnd(), s2.getBegin()));
    assertTrue(isSameSentence(jcas, s1.getBegin() + 1, s1.getEnd() - 1));
    assertTrue(isSameSentence(jcas, s1.getEnd() - 1, s1.getBegin() + 1));
    assertTrue(isSameSentence(jcas, s1.getBegin(), s1.getEnd()));
    assertTrue(isSameSentence(jcas, s1.getEnd(), s1.getBegin()));
    assertFalse(isSameSentence(jcas, s2.getBegin(), s1.getBegin()));
    assertFalse(isSameSentence(jcas, s1.getBegin(), s2.getBegin()));
    assertTrue(isSameSentence(jcas, 0, 0));
}
Also used : JCasBuilder(org.apache.uima.fit.factory.JCasBuilder) JCas(org.apache.uima.jcas.JCas) WebAnnoCasUtil.isSameSentence(de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.isSameSentence) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Test(org.junit.Test)

Example 74 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class BratRenderer method render.

/**
 * wrap JSON responses to BRAT visualizer
 *
 * @param aResponse
 *            the response.
 * @param aState
 *            the annotator model.
 * @param aJCas
 *            the JCas.
 * @param aAnnotationService
 *            the annotation service.s
 */
public static void render(GetDocumentResponse aResponse, AnnotatorState aState, VDocument aVDoc, JCas aJCas, AnnotationSchemaService aAnnotationService, ColoringStrategy aColoringStrategy) {
    aResponse.setRtlMode(ScriptDirection.RTL.equals(aState.getScriptDirection()));
    aResponse.setFontZoom(aState.getPreferences().getFontZoom());
    // Render invisible baseline annotations (sentence, tokens)
    renderTokenAndSentence(aJCas, aResponse, aState);
    // Render visible (custom) layers
    Map<String[], Queue<String>> colorQueues = new HashMap<>();
    for (AnnotationLayer layer : aVDoc.getAnnotationLayers()) {
        ColoringStrategy coloringStrategy = aColoringStrategy != null ? aColoringStrategy : ColoringStrategy.getStrategy(aAnnotationService, layer, aState.getPreferences(), colorQueues);
        TypeAdapter typeAdapter = aAnnotationService.getAdapter(layer);
        for (VSpan vspan : aVDoc.spans(layer.getId())) {
            List<Offsets> offsets = toOffsets(vspan.getRanges());
            String bratLabelText = TypeUtil.getUiLabelText(typeAdapter, vspan.getFeatures());
            String bratHoverText = TypeUtil.getUiHoverText(typeAdapter, vspan.getHoverFeatures());
            String color;
            if (vspan.getColorHint() == null) {
                color = getColor(vspan, coloringStrategy, bratLabelText);
            } else {
                color = vspan.getColorHint();
            }
            aResponse.addEntity(new Entity(vspan.getVid(), vspan.getType(), offsets, bratLabelText, color, bratHoverText));
        }
        for (VArc varc : aVDoc.arcs(layer.getId())) {
            String bratLabelText;
            if (varc.getLabelHint() == null) {
                bratLabelText = TypeUtil.getUiLabelText(typeAdapter, varc.getFeatures());
            } else {
                bratLabelText = varc.getLabelHint();
            }
            String color;
            if (varc.getColorHint() == null) {
                color = getColor(varc, coloringStrategy, bratLabelText);
            } else {
                color = varc.getColorHint();
            }
            aResponse.addRelation(new Relation(varc.getVid(), varc.getType(), getArgument(varc.getSource(), varc.getTarget()), bratLabelText, color));
        }
    }
    List<Sentence> sentences = new ArrayList<>(select(aJCas, Sentence.class));
    for (VComment vcomment : aVDoc.comments()) {
        String type;
        switch(vcomment.getCommentType()) {
            case ERROR:
                type = AnnotationComment.ANNOTATION_ERROR;
                break;
            case INFO:
                type = AnnotationComment.ANNOTATOR_NOTES;
                break;
            case YIELD:
                type = "Yield";
                break;
            default:
                type = AnnotationComment.ANNOTATOR_NOTES;
                break;
        }
        AnnotationFS fs;
        if (!vcomment.getVid().isSynthetic() && ((fs = selectByAddr(aJCas, vcomment.getVid().getId())) instanceof Sentence)) {
            int index = sentences.indexOf(fs) + 1;
            aResponse.addComment(new SentenceComment(index, type, vcomment.getComment()));
        } else {
            aResponse.addComment(new AnnotationComment(vcomment.getVid(), type, vcomment.getComment()));
        }
    }
    // Render markers
    for (VMarker vmarker : aVDoc.getMarkers()) {
        if (vmarker instanceof VAnnotationMarker) {
            VAnnotationMarker marker = (VAnnotationMarker) vmarker;
            aResponse.addMarker(new AnnotationMarker(vmarker.getType(), marker.getVid()));
        } else if (vmarker instanceof VSentenceMarker) {
            VSentenceMarker marker = (VSentenceMarker) vmarker;
            aResponse.addMarker(new SentenceMarker(vmarker.getType(), marker.getIndex()));
        } else if (vmarker instanceof VTextMarker) {
            VTextMarker marker = (VTextMarker) vmarker;
            aResponse.addMarker(new TextMarker(marker.getType(), marker.getBegin(), marker.getEnd()));
        } else {
            LOG.warn("Unknown how to render marker: [" + vmarker + "]");
        }
    }
}
Also used : Entity(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.Entity) VAnnotationMarker(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VAnnotationMarker) VTextMarker(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VTextMarker) HashMap(java.util.HashMap) AnnotationMarker(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.AnnotationMarker) VAnnotationMarker(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VAnnotationMarker) ArrayList(java.util.ArrayList) ColoringStrategy(de.tudarmstadt.ukp.clarin.webanno.api.annotation.coloring.ColoringStrategy) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) Offsets(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.Offsets) TextMarker(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.TextMarker) VTextMarker(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VTextMarker) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) VSentenceMarker(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VSentenceMarker) SentenceMarker(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.SentenceMarker) Relation(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.Relation) Queue(java.util.Queue) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) VSpan(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VSpan) VComment(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VComment) AnnotationComment(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.AnnotationComment) VSentenceMarker(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VSentenceMarker) VArc(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VArc) TypeAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.TypeAdapter) SentenceComment(de.tudarmstadt.ukp.clarin.webanno.brat.render.model.SentenceComment) VMarker(de.tudarmstadt.ukp.clarin.webanno.api.annotation.rendering.model.VMarker)

Example 75 with Sentence

use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.

the class WebannoTsv3Reader method createSentence.

private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd) {
    // last sentence - otherwise offsets will be off on a round-trip.
    if (aPrevEnd == aBegin && coveredText.length() > 0 && (coveredText.charAt(coveredText.length() - 1) == '\n')) {
        coveredText.deleteCharAt(coveredText.length() - 1);
    }
    if (aPrevEnd + 1 < aBegin) {
        // FIXME This is very slow. Better use StringUtils.repeat()
        // if there is plenty of spaces between
        StringBuilder pad = new StringBuilder();
        // sentences
        for (int i = aPrevEnd + 1; i < aBegin; i++) {
            pad.append(" ");
        }
        coveredText.append(pad).append(aLine).append(LF);
    } else {
        coveredText.append(aLine).append(LF);
    }
    Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
    sentence.addToIndexes();
}
Also used : Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Aggregations

Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)90 JCas (org.apache.uima.jcas.JCas)41 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)34 ArrayList (java.util.ArrayList)22 AnnotatorState (de.tudarmstadt.ukp.clarin.webanno.api.annotation.model.AnnotatorState)14 Type (org.apache.uima.cas.Type)12 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)12 IOException (java.io.IOException)9 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)8 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)8 Test (org.junit.Test)8 HashMap (java.util.HashMap)7 TokenBuilder (org.apache.uima.fit.testing.factory.TokenBuilder)7 AnnotationException (de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException)6 WebAnnoCasUtil.getFirstSentence (de.tudarmstadt.ukp.clarin.webanno.api.annotation.util.WebAnnoCasUtil.getFirstSentence)6 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)6 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)6 FrequencyDistribution (de.tudarmstadt.ukp.dkpro.core.api.frequency.util.FrequencyDistribution)6 CASException (org.apache.uima.cas.CASException)6 AutomationTypeAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.AutomationTypeAdapter)5