Search in sources :

Example 6 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class AnnotationMerging method mergeAnnotation.

/**
 * Merge all annotationset from an array. If one annotation is in at least
 * numK annotation sets, then put it into the merging annotation set.
 */
public static void mergeAnnotation(AnnotationSet[] annsArr, String nameFeat, HashMap<Annotation, String> mergeAnns, int numMinK, boolean isTheSameInstances) {
    int numA = annsArr.length;
    // First copy the annotatioin sets into a temp array
    @SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
    for (int i = 0; i < numA; ++i) {
        if (annsArr[i] != null) {
            annsArrTemp[i] = new HashSet<Annotation>();
            for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
        }
    }
    HashSet<String> featSet = new HashSet<String>();
    if (nameFeat != null)
        featSet.add(nameFeat);
    if (numMinK < 1)
        numMinK = 1;
    for (int iA = 0; iA < numA - numMinK + 1; ++iA) {
        if (annsArrTemp[iA] != null) {
            for (Annotation ann : annsArrTemp[iA]) {
                int numContained = 1;
                StringBuffer featAdd = new StringBuffer();
                featAdd.append(iA);
                StringBuffer featDisa = new StringBuffer();
                if (iA > 0) {
                    featDisa.append("0");
                    for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
                }
                int numDisagreed = iA;
                for (int i = iA + 1; i < numA; ++i) {
                    boolean isContained = false;
                    if (annsArrTemp[i] != null) {
                        Annotation annT = null;
                        for (Annotation ann0 : annsArrTemp[i]) {
                            if (ann0.isCompatible(ann, featSet)) {
                                ++numContained;
                                featAdd.append("-" + i);
                                annT = ann0;
                                isContained = true;
                                break;
                            }
                        }
                        if (isContained)
                            annsArrTemp[i].remove(annT);
                    }
                    if (!isContained) {
                        if (numDisagreed == 0)
                            featDisa.append(i);
                        else
                            featDisa.append("-" + i);
                        ++numDisagreed;
                    }
                }
                if (numContained >= numMinK) {
                    mergeAnns.put(ann, featAdd.toString());
                } else if (isTheSameInstances && nameFeat != null) {
                    ann.getFeatures().remove(nameFeat);
                    mergeAnns.put(ann, featAdd.toString());
                }
            }
        }
    }
    // Remove the annotation in the same place
    removeDuplicate(mergeAnns);
    return;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation) HashSet(java.util.HashSet)

Example 7 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class AnnotationMerging method mergeAnnotationMajority.

/**
 * Merge all annotationset from an array. If one annotation is agreed by
 * the majority of the annotators, then put it into the merging annotation set.
 */
public static void mergeAnnotationMajority(AnnotationSet[] annsArr, String nameFeat, HashMap<Annotation, String> mergeAnns, boolean isTheSameInstances) {
    int numA = annsArr.length;
    if (nameFeat == null) {
        mergeAnnogationMajorityNoFeat(annsArr, mergeAnns, isTheSameInstances);
        return;
    }
    // First copy the annotatioin sets into a temp array
    @SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
    for (int i = 0; i < numA; ++i) {
        if (annsArr[i] != null) {
            annsArrTemp[i] = new HashSet<Annotation>();
            for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
        }
    }
    for (int iA = 0; iA < numA; ++iA) {
        if (annsArrTemp[iA] != null) {
            for (Annotation ann : annsArrTemp[iA]) {
                int numDisagreed = 0;
                // Already the iA annotators don't agree the annotation
                numDisagreed = iA;
                StringBuffer featDisa = new StringBuffer();
                if (iA > 0) {
                    featDisa.append("0");
                    for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
                }
                HashMap<String, String> featOthers = new HashMap<String, String>();
                String featTh = null;
                if (ann.getFeatures().get(nameFeat) != null)
                    featTh = ann.getFeatures().get(nameFeat).toString();
                featOthers.put(featTh, new Integer(iA).toString());
                HashMap<String, Annotation> annAll = new HashMap<String, Annotation>();
                annAll.put(featTh, ann);
                for (int i = iA + 1; i < numA; ++i) {
                    boolean isContained = false;
                    if (annsArrTemp[i] != null) {
                        Annotation annT = null;
                        for (Annotation ann0 : annsArrTemp[i]) {
                            if (ann0.coextensive(ann)) {
                                String featValue = null;
                                if (ann0.getFeatures().get(nameFeat) != null)
                                    featValue = ann0.getFeatures().get(nameFeat).toString();
                                if (!featOthers.containsKey(featValue)) {
                                    featOthers.put(featValue, new Integer(i).toString());
                                    annAll.put(featValue, ann0);
                                } else {
                                    String str = featOthers.get(featValue);
                                    featOthers.put(featValue, str + "-" + i);
                                }
                                annT = ann0;
                                isContained = true;
                                break;
                            }
                        }
                        if (isContained)
                            annsArrTemp[i].remove(annT);
                    }
                    if (!isContained) {
                        if (numDisagreed == 0)
                            featDisa.append(i);
                        else
                            featDisa.append("-" + i);
                        ++numDisagreed;
                    }
                }
                // end of the loop for the following annotation set
                int numAgreed = -1;
                String agreeFeat = null;
                for (String str : featOthers.keySet()) {
                    String str0 = featOthers.get(str);
                    int num = 1;
                    while (str0.contains("-")) {
                        ++num;
                        str0 = str0.substring(str0.indexOf('-') + 1);
                    }
                    if (numAgreed < num) {
                        numAgreed = num;
                        agreeFeat = str;
                    }
                }
                if (numAgreed >= numDisagreed) {
                    mergeAnns.put(annAll.get(agreeFeat), featOthers.get(agreeFeat));
                } else if (isTheSameInstances) {
                    if (ann.getFeatures().get(nameFeat) != null)
                        ann.getFeatures().remove(nameFeat);
                    mergeAnns.put(ann, featDisa.toString());
                }
            }
        // for each ann in the current annotation set
        }
    }
    return;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) AnnotationSet(gate.AnnotationSet) HashMap(java.util.HashMap) Annotation(gate.Annotation)

Example 8 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class CorpusBenchmarkTool method measureDocs.

// calculateAvgTotal
protected AnnotationDiffer measureDocs(Document keyDoc, Document respDoc, String annotType) throws ResourceInstantiationException {
    if (keyDoc == null || respDoc == null)
        return null;
    if (annotSetName != null && keyDoc.getAnnotations(annotSetName).get(annotType) == null)
        return null;
    else if ((annotSetName == null || annotSetName.equals("")) && keyDoc.getAnnotations().get(annotType) == null)
        return null;
    // create an annotation diff
    AnnotationDiffer annotDiffer = new AnnotationDiffer();
    // set the feature names set for annotation differ
    annotDiffer.setSignificantFeaturesSet(diffFeaturesSet);
    // we need to find the sets
    AnnotationSet keys, responses;
    if (annotSetName == null || annotSetName.equals("")) {
        keys = keyDoc.getAnnotations().get(annotType);
        responses = respDoc.getAnnotations().get(annotType);
    } else {
        keys = keyDoc.getAnnotations(annotSetName).get(annotType);
        responses = respDoc.getAnnotations(outputSetName).get(annotType);
    }
    // we have annotation sets so call the annotationDiffer
    annotDiffer.calculateDiff(keys, responses);
    return annotDiffer;
}
Also used : AnnotationSet(gate.AnnotationSet)

Example 9 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class TestAnnotationMerging method testWithfeat.

/**
 * The actual method for testing.
 */
public void testWithfeat(String nameAnnSets, String nameAnnType, String nameAnnFeat, Corpus data, boolean isUsingMajority) {
    // get the annotation sets
    String[] annSetsN = nameAnnSets.split(";");
    int numJudges = annSetsN.length;
    int numDocs = data.size();
    AnnotationSet[][] annArr2 = new AnnotationSet[numDocs][numJudges];
    for (int i = 0; i < numDocs; ++i) {
        Document doc = data.get(i);
        for (int j = 0; j < numJudges; ++j) {
            // Get the annotation
            annArr2[i][j] = doc.getAnnotations(annSetsN[j]).get(nameAnnType);
        }
    }
    // Annotation merging
    boolean isTheSameInstances = true;
    for (int i = 0; i < annArr2.length; ++i) if (!AnnotationMerging.isSameInstancesForAnnotators(annArr2[i], 1)) {
        isTheSameInstances = false;
        break;
    }
    HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>();
    if (isUsingMajority)
        AnnotationMerging.mergeAnnotationMajority(annArr2[0], nameAnnFeat, mergeInfor, isTheSameInstances);
    else
        AnnotationMerging.mergeAnnotation(annArr2[0], nameAnnFeat, mergeInfor, 2, isTheSameInstances);
    int numAnns = 0;
    if (isTheSameInstances) {
        for (Annotation ann : mergeInfor.keySet()) {
            if (ann.getFeatures().get(nameAnnFeat) != null)
                ++numAnns;
        }
    } else {
        numAnns = mergeInfor.size();
    }
    checkNumbers(numAnns);
}
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) Document(gate.Document) TestDocument(gate.corpora.TestDocument) Annotation(gate.Annotation)

Example 10 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class TestDiffer method testDiffer.

// tearDown
public void testDiffer() throws Exception {
    Document doc = Factory.newDocument(new URL(gate.corpora.TestDocument.getTestServerName() + "tests/ft-bt-03-aug-2001.html"), "windows-1252");
    AnnotationSet annSet = doc.getAnnotations();
    // create 100 annotations
    FeatureMap features = Factory.newFeatureMap();
    features.put("type", "BAR");
    for (int i = 0; i < 100; i++) {
        annSet.add(new Long(i * 10), new Long((i + 1) * 10), "Foo", features);
    }
    List<Annotation> keySet = new ArrayList<Annotation>(annSet);
    List<Annotation> responseSet = new ArrayList<Annotation>(annSet);
    // check 100% Precision and recall
    AnnotationDiffer differ = new AnnotationDiffer();
    differ.setSignificantFeaturesSet(null);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    double value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getPrecisionLenient();
    Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallLenient();
    Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
    // check low precision
    Integer id = annSet.add(new Long(2), new Long(4), "Foo", features);
    Annotation falsePositive = annSet.get(id);
    responseSet.add(falsePositive);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of .99!", .99, value, .001);
    // recall should still be 100%
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallLenient();
    Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
    // check low recall
    responseSet.remove(falsePositive);
    keySet.add(falsePositive);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of .99!", .99, value, .001);
    // precision should still be 100%
    value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getPrecisionLenient();
    Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
}
Also used : FeatureMap(gate.FeatureMap) ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Document(gate.Document) URL(java.net.URL) Annotation(gate.Annotation)

Aggregations

AnnotationSet (gate.AnnotationSet)43 Annotation (gate.Annotation)27 ArrayList (java.util.ArrayList)14 HashMap (java.util.HashMap)11 HashSet (java.util.HashSet)11 Document (gate.Document)9 List (java.util.List)8 FeatureMap (gate.FeatureMap)7 InvalidOffsetException (gate.util.InvalidOffsetException)6 AnnotationSetImpl (gate.annotation.AnnotationSetImpl)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)4 Point (java.awt.Point)4 IOException (java.io.IOException)4 URL (java.net.URL)4 Map (java.util.Map)4 Color (java.awt.Color)3 TreeSet (java.util.TreeSet)3 TestDocument (gate.corpora.TestDocument)2