Search in sources :

Example 61 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class AnnotationMerging method mergeAnnogationMajorityNoFeat.

/**
 * The majority merging method for the annotaiton not specifying any annotation
 * feature for label.
 */
private static void mergeAnnogationMajorityNoFeat(AnnotationSet[] annsArr, HashMap<Annotation, String> mergeAnns, boolean isTheSameInstances) {
    int numA = annsArr.length;
    // First copy the annotatioin sets into a temp array
    @SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
    for (int i = 0; i < numA; ++i) {
        if (annsArr[i] != null) {
            annsArrTemp[i] = new HashSet<Annotation>();
            for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
        }
    }
    for (int iA = 0; iA < numA; ++iA) {
        if (annsArrTemp[iA] != null) {
            for (Annotation ann : annsArrTemp[iA]) {
                int numDisagreed = 0;
                // Already the iA annotators don't agree the annotation
                numDisagreed = iA;
                StringBuffer featDisa = new StringBuffer();
                if (iA > 0) {
                    featDisa.append("0");
                    for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
                }
                int numAgreed = 1;
                StringBuffer featAdd = new StringBuffer();
                featAdd.append(iA);
                for (int i = iA + 1; i < numA; ++i) {
                    boolean isContained = false;
                    if (annsArrTemp[i] != null) {
                        Annotation annT = null;
                        for (Annotation ann0 : annsArrTemp[i]) {
                            if (ann0.coextensive(ann)) {
                                ++numAgreed;
                                annT = ann0;
                                isContained = true;
                                featAdd.append("-" + i);
                                break;
                            }
                        }
                        if (isContained)
                            annsArrTemp[i].remove(annT);
                    }
                    if (!isContained) {
                        if (numDisagreed == 0)
                            featDisa.append(i);
                        else
                            featDisa.append("-" + i);
                        ++numDisagreed;
                    }
                }
                // end of the loop for the following annotation set
                if (numAgreed >= numDisagreed) {
                    mergeAnns.put(ann, featAdd.toString());
                } else if (isTheSameInstances) {
                    mergeAnns.put(ann, featAdd.toString());
                }
            }
        // for each ann in the current annotation set
        }
    }
    return;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 62 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class ClassificationMeasures method calculateConfusionMatrix.

/**
 * Create a confusion matrix in which annotations of identical span
 * bearing the specified feature name are compared in terms of feature value.
 * Compiles list of classes (feature values) on the fly.
 *
 * @param aS1 annotation set to compare to the second
 * @param aS2 annotation set to compare to the first
 * @param type annotation type containing the features to compare
 * @param feature feature name whose values will be compared
 * @param verbose message error output when ignoring annotations
 */
public void calculateConfusionMatrix(AnnotationSet aS1, AnnotationSet aS2, String type, String feature, boolean verbose) {
    // We'll accumulate a list of the feature values (a.k.a. class labels)
    featureValues = new TreeSet<String>();
    // Make a hash of hashes for the counts.
    HashMap<String, HashMap<String, Float>> countMap = new HashMap<String, HashMap<String, Float>>();
    // Get all the annotations of the correct type containing
    // the correct feature
    HashSet<String> featureSet = new HashSet<String>();
    featureSet.add(feature);
    AnnotationSet relevantAnns1 = aS1.get(type, featureSet);
    AnnotationSet relevantAnns2 = aS2.get(type, featureSet);
    // For each annotation in aS1, find the match in aS2
    for (Annotation relevantAnn1 : relevantAnns1) {
        // First we need to check that this annotation is not identical in span
        // to anything else in the same set. Duplicates should be excluded.
        List<Annotation> dupeAnnotations = new ArrayList<Annotation>();
        for (Annotation aRelevantAnns1 : relevantAnns1) {
            if (aRelevantAnns1.equals(relevantAnn1)) {
                continue;
            }
            if (aRelevantAnns1.coextensive(relevantAnn1)) {
                dupeAnnotations.add(aRelevantAnns1);
                dupeAnnotations.add(relevantAnn1);
            }
        }
        if (dupeAnnotations.size() > 1) {
            if (verbose) {
                Out.prln("ClassificationMeasures: " + "Same span annotations in set 1 detected! Ignoring.");
                Out.prln(Arrays.toString(dupeAnnotations.toArray()));
            }
        } else {
            // Find the match in as2
            List<Annotation> coextensiveAnnotations = new ArrayList<Annotation>();
            for (Annotation relevantAnn2 : relevantAnns2) {
                if (relevantAnn2.coextensive(relevantAnn1)) {
                    coextensiveAnnotations.add(relevantAnn2);
                }
            }
            if (coextensiveAnnotations.size() == 0) {
                if (verbose) {
                    Out.prln("ClassificationMeasures: Annotation in set 1 " + "with no counterpart in set 2 detected! Ignoring.");
                    Out.prln(relevantAnn1.toString());
                }
            } else if (coextensiveAnnotations.size() == 1) {
                // What are our feature values?
                String featVal1 = String.valueOf(relevantAnn1.getFeatures().get(feature));
                String featVal2 = String.valueOf(coextensiveAnnotations.get(0).getFeatures().get(feature));
                // Make sure both are present in our feature value list
                featureValues.add(featVal1);
                featureValues.add(featVal2);
                // Update the matrix hash of hashes
                // Get the right hashmap for the as1 feature value
                HashMap<String, Float> subHash = countMap.get(featVal1);
                if (subHash == null) {
                    // This is a new as1 feature value, since it has no subhash yet
                    HashMap<String, Float> subHashForNewAS1FeatVal = new HashMap<String, Float>();
                    // Since it is a new as1 feature value, there can be no existing
                    // as2 feature values paired with it. So we make a new one for this
                    // as2 feature value
                    subHashForNewAS1FeatVal.put(featVal2, (float) 1);
                    countMap.put(featVal1, subHashForNewAS1FeatVal);
                } else {
                    // Increment the count
                    Float count = subHash.get(featVal2);
                    if (count == null) {
                        subHash.put(featVal2, (float) 1);
                    } else {
                        subHash.put(featVal2, (float) count.intValue() + 1);
                    }
                }
            } else if (coextensiveAnnotations.size() > 1) {
                if (verbose) {
                    Out.prln("ClassificationMeasures: " + "Same span annotations in set 2 detected! Ignoring.");
                    Out.prln(Arrays.toString(coextensiveAnnotations.toArray()));
                }
            }
        }
    }
    // Now we have this hash of hashes, but the calculation implementations
    // require an array of floats. So for now we can just translate it.
    confusionMatrix = convert2DHashTo2DFloatArray(countMap, featureValues);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation) HashSet(java.util.HashSet)

Example 63 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class CorpusBenchmarkTool method countWords.

/**
 * Count all Token.kind=word annotations in the document
 */
protected int countWords(Document annotDoc) {
    int count = 0;
    if (annotDoc == null)
        return 0;
    // check for Token in outputSetName
    AnnotationSet tokens = annotDoc.getAnnotations(outputSetName).get("Token");
    if (tokens == null)
        return 0;
    Iterator<Annotation> it = tokens.iterator();
    Annotation currAnnotation;
    while (it.hasNext()) {
        currAnnotation = it.next();
        Object feature = currAnnotation.getFeatures().get("kind");
        if (feature != null && "word".equalsIgnoreCase((String) feature))
            ++count;
    }
    return count;
}
Also used : AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 64 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class AnnotationDiffer method calculateDiff.

/**
 * Computes a diff between two collections of annotations.
 * @param key the collection of key annotations.
 * @param response the collection of response annotations.
 * @return a list of {@link Pairing} objects representing the pairing set
 * that results in the best score.
 */
public List<Pairing> calculateDiff(Collection<Annotation> key, Collection<Annotation> response) {
    // initialise data structures
    if (key == null || key.size() == 0)
        keyList = new ArrayList<Annotation>();
    else
        keyList = new ArrayList<Annotation>(key);
    if (response == null || response.size() == 0)
        responseList = new ArrayList<Annotation>();
    else
        responseList = new ArrayList<Annotation>(response);
    if (correctAnnotations != null) {
        correctAnnotations.clear();
    } else {
        correctAnnotations = new HashSet<Annotation>();
    }
    if (partiallyCorrectAnnotations != null) {
        partiallyCorrectAnnotations.clear();
    } else {
        partiallyCorrectAnnotations = new HashSet<Annotation>();
    }
    if (missingAnnotations != null) {
        missingAnnotations.clear();
    } else {
        missingAnnotations = new HashSet<Annotation>();
    }
    if (spuriousAnnotations != null) {
        spuriousAnnotations.clear();
    } else {
        spuriousAnnotations = new HashSet<Annotation>();
    }
    keyChoices = new ArrayList<List<Pairing>>(keyList.size());
    keyChoices.addAll(Collections.nCopies(keyList.size(), (List<Pairing>) null));
    responseChoices = new ArrayList<List<Pairing>>(responseList.size());
    responseChoices.addAll(Collections.nCopies(responseList.size(), (List<Pairing>) null));
    possibleChoices = new ArrayList<Pairing>();
    // 1) try all possible pairings
    for (int i = 0; i < keyList.size(); i++) {
        for (int j = 0; j < responseList.size(); j++) {
            Annotation keyAnn = keyList.get(i);
            Annotation resAnn = responseList.get(j);
            PairingImpl choice = null;
            if (keyAnn.coextensive(resAnn)) {
                // we have full overlap -> CORRECT or WRONG
                if (keyAnn.isCompatible(resAnn, significantFeaturesSet)) {
                    // we have a full match
                    choice = new PairingImpl(i, j, CORRECT_VALUE);
                } else {
                    // the two annotations are coextensive but don't match
                    // we have a missmatch
                    choice = new PairingImpl(i, j, MISMATCH_VALUE);
                }
            } else if (keyAnn.overlaps(resAnn)) {
                // we have partial overlap -> PARTIALLY_CORRECT or WRONG
                if (keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)) {
                    choice = new PairingImpl(i, j, PARTIALLY_CORRECT_VALUE);
                } else {
                    choice = new PairingImpl(i, j, WRONG_VALUE);
                }
            }
            // add the new choice if any
            if (choice != null) {
                addPairing(choice, i, keyChoices);
                addPairing(choice, j, responseChoices);
                possibleChoices.add(choice);
            }
        }
    // for j
    }
    // for i
    // 2) from all possible pairings, find the maximal set that also
    // maximises the total score
    Collections.sort(possibleChoices, new PairingScoreComparator());
    Collections.reverse(possibleChoices);
    finalChoices = new ArrayList<Pairing>();
    correctMatches = 0;
    partiallyCorrectMatches = 0;
    missing = 0;
    spurious = 0;
    while (!possibleChoices.isEmpty()) {
        PairingImpl bestChoice = (PairingImpl) possibleChoices.remove(0);
        bestChoice.consume();
        finalChoices.add(bestChoice);
        switch(bestChoice.value) {
            case CORRECT_VALUE:
                {
                    correctAnnotations.add(bestChoice.getResponse());
                    correctMatches++;
                    bestChoice.setType(CORRECT_TYPE);
                    break;
                }
            case PARTIALLY_CORRECT_VALUE:
                {
                    partiallyCorrectAnnotations.add(bestChoice.getResponse());
                    partiallyCorrectMatches++;
                    bestChoice.setType(PARTIALLY_CORRECT_TYPE);
                    break;
                }
            case MISMATCH_VALUE:
                {
                    // this is a missing and a spurious annotations together
                    missingAnnotations.add(bestChoice.getKey());
                    missing++;
                    spuriousAnnotations.add(bestChoice.getResponse());
                    spurious++;
                    bestChoice.setType(MISMATCH_TYPE);
                    break;
                }
            case WRONG_VALUE:
                {
                    if (bestChoice.getKey() != null) {
                        // we have a missed key
                        if (missingAnnotations == null)
                            missingAnnotations = new HashSet<Annotation>();
                        missingAnnotations.add(bestChoice.getKey());
                        missing++;
                        bestChoice.setType(MISSING_TYPE);
                    }
                    if (bestChoice.getResponse() != null) {
                        // we have a spurious response
                        if (spuriousAnnotations == null)
                            spuriousAnnotations = new HashSet<Annotation>();
                        spuriousAnnotations.add(bestChoice.getResponse());
                        spurious++;
                        bestChoice.setType(SPURIOUS_TYPE);
                    }
                    break;
                }
            default:
                {
                    throw new GateRuntimeException("Invalid pairing type: " + bestChoice.value);
                }
        }
    }
    // get the unmatched keys
    for (int i = 0; i < keyChoices.size(); i++) {
        List<Pairing> aList = keyChoices.get(i);
        if (aList == null || aList.isEmpty()) {
            if (missingAnnotations == null)
                missingAnnotations = new HashSet<Annotation>();
            missingAnnotations.add((keyList.get(i)));
            Pairing choice = new PairingImpl(i, -1, WRONG_VALUE);
            choice.setType(MISSING_TYPE);
            finalChoices.add(choice);
            missing++;
        }
    }
    // get the unmatched responses
    for (int i = 0; i < responseChoices.size(); i++) {
        List<Pairing> aList = responseChoices.get(i);
        if (aList == null || aList.isEmpty()) {
            if (spuriousAnnotations == null)
                spuriousAnnotations = new HashSet<Annotation>();
            spuriousAnnotations.add((responseList.get(i)));
            PairingImpl choice = new PairingImpl(-1, i, WRONG_VALUE);
            choice.setType(SPURIOUS_TYPE);
            finalChoices.add(choice);
            spurious++;
        }
    }
    return finalChoices;
}
Also used : Annotation(gate.Annotation)

Example 65 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentStaxUtils method writeAnnotationSet.

/**
 * Writes the given annotation set to an XMLStreamWriter as GATE XML
 * format. The value for the Name attribute of the generated
 * AnnotationSet element is given by <code>asName</code>.
 *
 * @param annotations the annotation set to write
 * @param asName the name under which to write the annotation set.
 *          <code>null</code> means that no name will be used.
 * @param xsw the writer to use for output
 * @param namespaceURI
 * @throws XMLStreamException
 */
public static void writeAnnotationSet(Collection<Annotation> annotations, String asName, XMLStreamWriter xsw, String namespaceURI) throws XMLStreamException {
    xsw.writeStartElement(namespaceURI, "AnnotationSet");
    if (asName != null) {
        xsw.writeAttribute("Name", asName);
    }
    newLine(xsw);
    if (annotations != null) {
        Iterator<Annotation> iterator = annotations.iterator();
        while (iterator.hasNext()) {
            Annotation annot = iterator.next();
            xsw.writeStartElement(namespaceURI, "Annotation");
            xsw.writeAttribute("Id", String.valueOf(annot.getId()));
            xsw.writeAttribute("Type", annot.getType());
            xsw.writeAttribute("StartNode", String.valueOf(annot.getStartNode().getOffset()));
            xsw.writeAttribute("EndNode", String.valueOf(annot.getEndNode().getOffset()));
            newLine(xsw);
            writeFeatures(annot.getFeatures(), xsw, namespaceURI);
            xsw.writeEndElement();
            newLine(xsw);
        }
    }
    // end AnnotationSet element
    xsw.writeEndElement();
    newLine(xsw);
}
Also used : Annotation(gate.Annotation)

Aggregations

Annotation (gate.Annotation)69 AnnotationSet (gate.AnnotationSet)28 ArrayList (java.util.ArrayList)24 HashMap (java.util.HashMap)15 Node (gate.Node)10 HashSet (java.util.HashSet)10 List (java.util.List)10 FeatureMap (gate.FeatureMap)8 Map (java.util.Map)8 TreeSet (java.util.TreeSet)8 Document (gate.Document)7 InvalidOffsetException (gate.util.InvalidOffsetException)7 Point (java.awt.Point)6 LinkedList (java.util.LinkedList)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)3 Color (java.awt.Color)3 Stack (java.util.Stack)3 TreeMap (java.util.TreeMap)3