Search in sources :

Example 41 with CasDiff2

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2 in project webanno by webanno.

the class SuggestionBuilder method createCurationCas.

/**
 * For the first time a curation page is opened, create a MergeCas that contains only agreeing
 * annotations Using the CAS of the curator user.
 *
 * @param aProject
 *            the project
 * @param randomAnnotationDocument
 *            an annotation document.
 * @param jCases
 *            the JCases
 * @param aAnnotationLayers
 *            the layers.
 * @return the JCas.
 * @throws IOException
 *             if an I/O error occurs.
 */
public JCas createCurationCas(Project aProject, AnnotationDocument randomAnnotationDocument, Map<String, JCas> jCases, List<AnnotationLayer> aAnnotationLayers) throws IOException {
    JCas mergeJCas;
    boolean cacheEnabled = false;
    try {
        cacheEnabled = casStorageService.isCacheEnabled();
        casStorageService.disableCache();
        mergeJCas = documentService.readAnnotationCas(randomAnnotationDocument);
    } finally {
        if (cacheEnabled) {
            casStorageService.enableCache();
        }
    }
    jCases.put(WebAnnoConst.CURATION_USER, mergeJCas);
    List<Type> entryTypes = getEntryTypes(mergeJCas, aAnnotationLayers, annotationService);
    DiffResult diff = CasDiff2.doDiffSingle(annotationService, aProject, entryTypes, LinkCompareBehavior.LINK_ROLE_AS_LABEL, jCases, 0, mergeJCas.getDocumentText().length());
    mergeJCas = MergeCas.reMergeCas(diff, jCases);
    curationDocumentService.writeCurationCas(mergeJCas, randomAnnotationDocument.getDocument(), false);
    return mergeJCas;
}
Also used : Type(org.apache.uima.cas.Type) JCas(org.apache.uima.jcas.JCas) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult)

Example 42 with CasDiff2

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2 in project webanno by webanno.

the class SuggestionBuilder method buildCurationContainer.

public CurationContainer buildCurationContainer(AnnotatorState aBModel) throws UIMAException, ClassNotFoundException, IOException, AnnotationException {
    CurationContainer curationContainer = new CurationContainer();
    // initialize Variables
    SourceDocument sourceDocument = aBModel.getDocument();
    Map<Integer, Integer> segmentBeginEnd = new HashMap<>();
    Map<Integer, Integer> segmentNumber = new HashMap<>();
    Map<String, Map<Integer, Integer>> segmentAdress = new HashMap<>();
    // get annotation documents
    List<AnnotationDocument> finishedAnnotationDocuments = new ArrayList<>();
    for (AnnotationDocument annotationDocument : documentService.listAnnotationDocuments(aBModel.getDocument())) {
        if (annotationDocument.getState().equals(AnnotationDocumentState.FINISHED)) {
            finishedAnnotationDocuments.add(annotationDocument);
        }
    }
    Map<String, JCas> jCases = new HashMap<>();
    AnnotationDocument randomAnnotationDocument = null;
    JCas mergeJCas;
    // get the correction/automation JCas for the logged in user
    if (aBModel.getMode().equals(Mode.AUTOMATION) || aBModel.getMode().equals(Mode.CORRECTION)) {
        jCases = listJcasesforCorrection(randomAnnotationDocument, sourceDocument, aBModel.getMode());
        mergeJCas = getMergeCas(aBModel, sourceDocument, jCases, randomAnnotationDocument, false);
        String username = jCases.keySet().iterator().next();
        updateSegment(aBModel, segmentBeginEnd, segmentNumber, segmentAdress, jCases.get(username), username, aBModel.getWindowBeginOffset(), aBModel.getWindowEndOffset());
    } else {
        jCases = listJcasesforCuration(finishedAnnotationDocuments, randomAnnotationDocument, aBModel.getMode());
        mergeJCas = getMergeCas(aBModel, sourceDocument, jCases, randomAnnotationDocument, false);
        updateSegment(aBModel, segmentBeginEnd, segmentNumber, segmentAdress, mergeJCas, WebAnnoConst.CURATION_USER, WebAnnoCasUtil.getFirstSentence(mergeJCas).getBegin(), mergeJCas.getDocumentText().length());
    }
    List<Type> entryTypes = null;
    segmentAdress.put(WebAnnoConst.CURATION_USER, new HashMap<>());
    for (Sentence sentence : selectCovered(mergeJCas, Sentence.class, diffRangeBegin, diffRangeEnd)) {
        segmentAdress.get(WebAnnoConst.CURATION_USER).put(sentence.getBegin(), getAddr(sentence));
    }
    if (entryTypes == null) {
        entryTypes = getEntryTypes(mergeJCas, aBModel.getAnnotationLayers(), annotationService);
    }
    // for cross-sentences annotation, update the end of the segment
    if (firstload) {
        long start = System.currentTimeMillis();
        log.debug("Updating cross sentence annotation list...");
        updateCrossSentAnnoList(segmentBeginEnd, segmentNumber, jCases, entryTypes);
        firstload = false;
        log.debug("Cross sentence annotation list complete in {}ms", (System.currentTimeMillis() - start));
    }
    long diffStart = System.currentTimeMillis();
    log.debug("Calculating differences...");
    int count = 0;
    for (Integer begin : segmentBeginEnd.keySet()) {
        Integer end = segmentBeginEnd.get(begin);
        count++;
        if (count % 100 == 0) {
            log.debug("Processing differences: {} of {} sentences...", count, segmentBeginEnd.size());
        }
        DiffResult diff = CasDiff2.doDiffSingle(annotationService, aBModel.getProject(), entryTypes, LinkCompareBehavior.LINK_ROLE_AS_LABEL, jCases, begin, end);
        SourceListView curationSegment = new SourceListView();
        curationSegment.setBegin(begin);
        curationSegment.setEnd(end);
        curationSegment.setSentenceNumber(segmentNumber.get(begin));
        if (diff.hasDifferences() || !diff.getIncompleteConfigurationSets().isEmpty()) {
            // Is this confSet a diff due to stacked annotations (with same configuration)?
            boolean stackedDiff = false;
            stackedDiffSet: for (ConfigurationSet d : diff.getDifferingConfigurationSets().values()) {
                for (Configuration c : d.getConfigurations()) {
                    if (c.getCasGroupIds().size() != d.getCasGroupIds().size()) {
                        stackedDiff = true;
                        break stackedDiffSet;
                    }
                }
            }
            if (stackedDiff) {
                curationSegment.setSentenceState(SentenceState.DISAGREE);
            } else if (!diff.getIncompleteConfigurationSets().isEmpty()) {
                curationSegment.setSentenceState(SentenceState.DISAGREE);
            } else {
                curationSegment.setSentenceState(SentenceState.AGREE);
            }
        } else {
            curationSegment.setSentenceState(SentenceState.AGREE);
        }
        for (String username : segmentAdress.keySet()) {
            curationSegment.getSentenceAddress().put(username, segmentAdress.get(username).get(begin));
        }
        curationContainer.getCurationViewByBegin().put(begin, curationSegment);
    }
    log.debug("Difference calculation completed in {}ms", (System.currentTimeMillis() - diffStart));
    return curationContainer;
}
Also used : Configuration(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.Configuration) HashMap(java.util.HashMap) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument) ConfigurationSet(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ConfigurationSet) Type(org.apache.uima.cas.Type) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult) HashMap(java.util.HashMap) Map(java.util.Map) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 43 with CasDiff2

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2 in project webanno by webanno.

the class TwoPairedKappaTest method testThreeUserDiffArcAndSpanAnnotation.

@Test
public void testThreeUserDiffArcAndSpanAnnotation() throws Exception {
    Map<User, List<SourceDocument>> userDocs = new HashMap<>();
    userDocs.put(user1, asList(document));
    userDocs.put(user2, asList(document));
    userDocs.put(user3, asList(document));
    Map<User, JCas> userCases = new HashMap<>();
    userCases.put(user1, kappatestCas.getJCas());
    userCases.put(user2, kappaspandiff.getJCas());
    userCases.put(user3, kappaspanarcdiff.getJCas());
    Map<SourceDocument, Map<User, JCas>> documentJCases = new HashMap<>();
    documentJCases.put(document, userCases);
    // Check against new impl
    DiffResult diff = CasDiff2.doDiff(asList(POS.class.getName(), Dependency.class.getName()), asList(SpanDiffAdapter.POS, ArcDiffAdapter.DEPENDENCY), LinkCompareBehavior.LINK_TARGET_AS_LABEL, convert(userCases));
    Map<String, List<JCas>> user1and2 = convert(userCases);
    user1and2.remove("user3");
    AgreementResult agreement12 = AgreementUtils.getCohenKappaAgreement(diff, Dependency.class.getName(), "DependencyType", user1and2);
    Map<String, List<JCas>> user2and3 = convert(userCases);
    user2and3.remove("user1");
    AgreementResult agreement23 = AgreementUtils.getCohenKappaAgreement(diff, Dependency.class.getName(), "DependencyType", user2and3);
    Map<String, List<JCas>> user1and3 = convert(userCases);
    user1and3.remove("user2");
    AgreementResult agreement13 = AgreementUtils.getCohenKappaAgreement(diff, Dependency.class.getName(), "DependencyType", user1and3);
    // Asserts
    diff.print(System.out);
    System.out.printf("New agreement 1/2: %s%n", agreement12.toString());
    System.out.printf("New agreement 2/3: %s%n", agreement23.toString());
    System.out.printf("New agreement 1/3: %s%n", agreement13.toString());
}
Also used : AgreementResult(de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult) User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) JCas(org.apache.uima.jcas.JCas) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Arrays.asList(java.util.Arrays.asList) List(java.util.List) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Test(org.junit.Test)

Example 44 with CasDiff2

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2 in project webanno by webanno.

the class TwoPairedKappaTest method testTwoUserDiffArcAnnotation.

@Test
public void testTwoUserDiffArcAnnotation() throws Exception {
    Map<User, List<SourceDocument>> userDocs = new HashMap<>();
    userDocs.put(user1, asList(document));
    userDocs.put(user2, asList(document));
    Map<User, JCas> userCases = new HashMap<>();
    userCases.put(user1, kappatestCas.getJCas());
    userCases.put(user2, kappaarcdiff.getJCas());
    Map<SourceDocument, Map<User, JCas>> documentJCases = new HashMap<>();
    documentJCases.put(document, userCases);
    // Check against new impl
    DiffResult diff = CasDiff2.doDiff(Dependency.class, new ArcDiffAdapter(Dependency.class, "Dependent", "Governor", "DependencyType"), LinkCompareBehavior.LINK_TARGET_AS_LABEL, convert(userCases));
    AgreementResult agreement = AgreementUtils.getCohenKappaAgreement(diff, Dependency.class.getName(), "DependencyType", convert(userCases));
    // Asserts
    System.out.printf("Agreement: %s%n", agreement.toString());
    diff.print(System.out);
    assertEquals(0.86153d, agreement.getAgreement(), 0.00001d);
    assertEquals(9, diff.size());
    assertEquals(1, diff.getDifferingConfigurationSets().size());
    assertEquals(0, diff.getIncompleteConfigurationSets().size());
}
Also used : AgreementResult(de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult) User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) ArcDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) JCas(org.apache.uima.jcas.JCas) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Arrays.asList(java.util.Arrays.asList) List(java.util.List) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Test(org.junit.Test)

Example 45 with CasDiff2

use of de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2 in project webanno by webanno.

the class CasDiff2Test method twoEmptyCasTest.

@Test
public void twoEmptyCasTest() throws Exception {
    String text = "";
    JCas user1Cas = JCasFactory.createJCas();
    user1Cas.setDocumentText(text);
    JCas user2Cas = JCasFactory.createJCas();
    user2Cas.setDocumentText(text);
    Map<String, List<JCas>> casByUser = new LinkedHashMap<>();
    casByUser.put("user1", asList(user1Cas));
    casByUser.put("user2", asList(user2Cas));
    List<String> entryTypes = asList(Lemma.class.getName());
    List<SpanDiffAdapter> diffAdapters = asList(new SpanDiffAdapter(Lemma.class.getName()));
    DiffResult result = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
    result.print(System.out);
    assertEquals(0, result.size());
    assertEquals(0, result.getDifferingConfigurationSets().size());
    assertEquals(0, result.getIncompleteConfigurationSets().size());
    AgreementResult agreement = AgreementUtils.getCohenKappaAgreement(result, entryTypes.get(0), "value", casByUser);
    assertEquals(Double.NaN, agreement.getAgreement(), 0.000001d);
    assertEquals(0, agreement.getIncompleteSetsByPosition().size());
}
Also used : AgreementResult(de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) SpanDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.SpanDiffAdapter) JCas(org.apache.uima.jcas.JCas) ArrayList(java.util.ArrayList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Aggregations

DiffResult (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult)52 List (java.util.List)52 Arrays.asList (java.util.Arrays.asList)51 Test (org.junit.Test)51 ArrayList (java.util.ArrayList)48 JCas (org.apache.uima.jcas.JCas)44 SpanDiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.SpanDiffAdapter)37 LinkedHashMap (java.util.LinkedHashMap)26 AgreementResult (de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult)24 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)20 ArcDiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter)12 Type (org.apache.uima.cas.Type)12 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)11 HashMap (java.util.HashMap)8 Map (java.util.Map)8 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)6 User (de.tudarmstadt.ukp.clarin.webanno.security.model.User)5 FeatureStructure (org.apache.uima.cas.FeatureStructure)5 TypeSystemDescription (org.apache.uima.resource.metadata.TypeSystemDescription)5 NamedEntity (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity)4