Search in sources :

Example 1 with UnitizingAnnotationStudy

use of org.dkpro.statistics.agreement.unitizing.UnitizingAnnotationStudy in project webanno by webanno.

the class KrippendorffAlphaUnitizingAgreementMeasure method calculatePairAgreement.

public UnitizingAgreementResult calculatePairAgreement(Map<String, List<CAS>> aCasMap) {
    String typeName = getFeature().getLayer().getName();
    // Calculate a character offset continuum over all CASses. We assume here that the documents
    // all have the same size - since the users cannot change the document sizes, this should be
    // an universally true assumption.
    List<CAS> firstUserCasses = aCasMap.values().stream().findFirst().get();
    int docCount = firstUserCasses.size();
    int[] docSizes = new int[docCount];
    Arrays.fill(docSizes, 0);
    for (Entry<String, List<CAS>> set : aCasMap.entrySet()) {
        int i = 0;
        for (CAS cas : set.getValue()) {
            if (cas != null) {
                assert docSizes[i] == 0 || docSizes[i] == cas.getDocumentText().length();
                docSizes[i] = cas.getDocumentText().length();
            }
            i++;
        }
    }
    int continuumSize = Arrays.stream(docSizes).sum();
    // Create a unitizing study for that continuum.
    UnitizingAnnotationStudy study = new UnitizingAnnotationStudy(continuumSize);
    // them to the unitizing study based on character offsets.
    for (Entry<String, List<CAS>> set : aCasMap.entrySet()) {
        int raterIdx = study.addRater(set.getKey());
        int docOffset = 0;
        int i = 0;
        for (CAS cas : set.getValue()) {
            // skip it.
            if (cas != null) {
                Type t = cas.getTypeSystem().getType(typeName);
                Feature f = t.getFeatureByBaseName(getFeature().getName());
                int currentDocOffset = docOffset;
                cas.select(t).map(fs -> (AnnotationFS) fs).forEach(fs -> {
                    study.addUnit(currentDocOffset + fs.getBegin(), fs.getEnd() - fs.getBegin(), raterIdx, FSUtil.getFeature(fs, f, Object.class));
                });
            }
            docOffset += docSizes[i];
            i++;
        }
    }
    UnitizingAgreementResult result = new UnitizingAgreementResult(typeName, getFeature().getName(), study, new ArrayList<>(aCasMap.keySet()), getTraits().isExcludeIncomplete());
    IAgreementMeasure agreement = new KrippendorffAlphaUnitizingAgreement(study);
    if (result.getStudy().getUnitCount() > 0) {
        result.setAgreement(agreement.calculateAgreement());
    } else {
        result.setAgreement(Double.NaN);
    }
    return result;
}
Also used : UnitizingAnnotationStudy(org.dkpro.statistics.agreement.unitizing.UnitizingAnnotationStudy) AgreementMeasure_ImplBase(de.tudarmstadt.ukp.clarin.webanno.agreement.measures.AgreementMeasure_ImplBase) Arrays(java.util.Arrays) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) CAS(org.apache.uima.cas.CAS) Feature(org.apache.uima.cas.Feature) UnitizingAnnotationStudy(org.dkpro.statistics.agreement.unitizing.UnitizingAnnotationStudy) KrippendorffAlphaUnitizingAgreement(org.dkpro.statistics.agreement.unitizing.KrippendorffAlphaUnitizingAgreement) FSUtil(org.apache.uima.fit.util.FSUtil) ArrayList(java.util.ArrayList) Type(org.apache.uima.cas.Type) LinkedHashMap(java.util.LinkedHashMap) AnnotationSchemaService(de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService) List(java.util.List) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) Map(java.util.Map) Entry(java.util.Map.Entry) IAgreementMeasure(org.dkpro.statistics.agreement.IAgreementMeasure) UnitizingAgreementResult(de.tudarmstadt.ukp.clarin.webanno.agreement.results.unitizing.UnitizingAgreementResult) PairwiseAnnotationResult(de.tudarmstadt.ukp.clarin.webanno.agreement.PairwiseAnnotationResult) UnitizingAgreementResult(de.tudarmstadt.ukp.clarin.webanno.agreement.results.unitizing.UnitizingAgreementResult) IAgreementMeasure(org.dkpro.statistics.agreement.IAgreementMeasure) KrippendorffAlphaUnitizingAgreement(org.dkpro.statistics.agreement.unitizing.KrippendorffAlphaUnitizingAgreement) Feature(org.apache.uima.cas.Feature) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

PairwiseAnnotationResult (de.tudarmstadt.ukp.clarin.webanno.agreement.PairwiseAnnotationResult)1 AgreementMeasure_ImplBase (de.tudarmstadt.ukp.clarin.webanno.agreement.measures.AgreementMeasure_ImplBase)1 UnitizingAgreementResult (de.tudarmstadt.ukp.clarin.webanno.agreement.results.unitizing.UnitizingAgreementResult)1 AnnotationSchemaService (de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService)1 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 CAS (org.apache.uima.cas.CAS)1 Feature (org.apache.uima.cas.Feature)1 Type (org.apache.uima.cas.Type)1 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)1 FSUtil (org.apache.uima.fit.util.FSUtil)1 IAgreementMeasure (org.dkpro.statistics.agreement.IAgreementMeasure)1 KrippendorffAlphaUnitizingAgreement (org.dkpro.statistics.agreement.unitizing.KrippendorffAlphaUnitizingAgreement)1 UnitizingAnnotationStudy (org.dkpro.statistics.agreement.unitizing.UnitizingAnnotationStudy)1