use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationMerging method mergeAnnogationMajorityNoFeat.
/**
* The majority merging method for the annotaiton not specifying any annotation
* feature for label.
*/
private static void mergeAnnogationMajorityNoFeat(AnnotationSet[] annsArr, HashMap<Annotation, String> mergeAnns, boolean isTheSameInstances) {
int numA = annsArr.length;
// First copy the annotatioin sets into a temp array
@SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
for (int i = 0; i < numA; ++i) {
if (annsArr[i] != null) {
annsArrTemp[i] = new HashSet<Annotation>();
for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
}
}
for (int iA = 0; iA < numA; ++iA) {
if (annsArrTemp[iA] != null) {
for (Annotation ann : annsArrTemp[iA]) {
int numDisagreed = 0;
// Already the iA annotators don't agree the annotation
numDisagreed = iA;
StringBuffer featDisa = new StringBuffer();
if (iA > 0) {
featDisa.append("0");
for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
}
int numAgreed = 1;
StringBuffer featAdd = new StringBuffer();
featAdd.append(iA);
for (int i = iA + 1; i < numA; ++i) {
boolean isContained = false;
if (annsArrTemp[i] != null) {
Annotation annT = null;
for (Annotation ann0 : annsArrTemp[i]) {
if (ann0.coextensive(ann)) {
++numAgreed;
annT = ann0;
isContained = true;
featAdd.append("-" + i);
break;
}
}
if (isContained)
annsArrTemp[i].remove(annT);
}
if (!isContained) {
if (numDisagreed == 0)
featDisa.append(i);
else
featDisa.append("-" + i);
++numDisagreed;
}
}
// end of the loop for the following annotation set
if (numAgreed >= numDisagreed) {
mergeAnns.put(ann, featAdd.toString());
} else if (isTheSameInstances) {
mergeAnns.put(ann, featAdd.toString());
}
}
// for each ann in the current annotation set
}
}
return;
}
use of gate.Annotation in project gate-core by GateNLP.
the class ClassificationMeasures method calculateConfusionMatrix.
/**
* Create a confusion matrix in which annotations of identical span
* bearing the specified feature name are compared in terms of feature value.
* Compiles list of classes (feature values) on the fly.
*
* @param aS1 annotation set to compare to the second
* @param aS2 annotation set to compare to the first
* @param type annotation type containing the features to compare
* @param feature feature name whose values will be compared
* @param verbose message error output when ignoring annotations
*/
public void calculateConfusionMatrix(AnnotationSet aS1, AnnotationSet aS2, String type, String feature, boolean verbose) {
// We'll accumulate a list of the feature values (a.k.a. class labels)
featureValues = new TreeSet<String>();
// Make a hash of hashes for the counts.
HashMap<String, HashMap<String, Float>> countMap = new HashMap<String, HashMap<String, Float>>();
// Get all the annotations of the correct type containing
// the correct feature
HashSet<String> featureSet = new HashSet<String>();
featureSet.add(feature);
AnnotationSet relevantAnns1 = aS1.get(type, featureSet);
AnnotationSet relevantAnns2 = aS2.get(type, featureSet);
// For each annotation in aS1, find the match in aS2
for (Annotation relevantAnn1 : relevantAnns1) {
// First we need to check that this annotation is not identical in span
// to anything else in the same set. Duplicates should be excluded.
List<Annotation> dupeAnnotations = new ArrayList<Annotation>();
for (Annotation aRelevantAnns1 : relevantAnns1) {
if (aRelevantAnns1.equals(relevantAnn1)) {
continue;
}
if (aRelevantAnns1.coextensive(relevantAnn1)) {
dupeAnnotations.add(aRelevantAnns1);
dupeAnnotations.add(relevantAnn1);
}
}
if (dupeAnnotations.size() > 1) {
if (verbose) {
Out.prln("ClassificationMeasures: " + "Same span annotations in set 1 detected! Ignoring.");
Out.prln(Arrays.toString(dupeAnnotations.toArray()));
}
} else {
// Find the match in as2
List<Annotation> coextensiveAnnotations = new ArrayList<Annotation>();
for (Annotation relevantAnn2 : relevantAnns2) {
if (relevantAnn2.coextensive(relevantAnn1)) {
coextensiveAnnotations.add(relevantAnn2);
}
}
if (coextensiveAnnotations.size() == 0) {
if (verbose) {
Out.prln("ClassificationMeasures: Annotation in set 1 " + "with no counterpart in set 2 detected! Ignoring.");
Out.prln(relevantAnn1.toString());
}
} else if (coextensiveAnnotations.size() == 1) {
// What are our feature values?
String featVal1 = String.valueOf(relevantAnn1.getFeatures().get(feature));
String featVal2 = String.valueOf(coextensiveAnnotations.get(0).getFeatures().get(feature));
// Make sure both are present in our feature value list
featureValues.add(featVal1);
featureValues.add(featVal2);
// Update the matrix hash of hashes
// Get the right hashmap for the as1 feature value
HashMap<String, Float> subHash = countMap.get(featVal1);
if (subHash == null) {
// This is a new as1 feature value, since it has no subhash yet
HashMap<String, Float> subHashForNewAS1FeatVal = new HashMap<String, Float>();
// Since it is a new as1 feature value, there can be no existing
// as2 feature values paired with it. So we make a new one for this
// as2 feature value
subHashForNewAS1FeatVal.put(featVal2, (float) 1);
countMap.put(featVal1, subHashForNewAS1FeatVal);
} else {
// Increment the count
Float count = subHash.get(featVal2);
if (count == null) {
subHash.put(featVal2, (float) 1);
} else {
subHash.put(featVal2, (float) count.intValue() + 1);
}
}
} else if (coextensiveAnnotations.size() > 1) {
if (verbose) {
Out.prln("ClassificationMeasures: " + "Same span annotations in set 2 detected! Ignoring.");
Out.prln(Arrays.toString(coextensiveAnnotations.toArray()));
}
}
}
}
// Now we have this hash of hashes, but the calculation implementations
// require an array of floats. So for now we can just translate it.
confusionMatrix = convert2DHashTo2DFloatArray(countMap, featureValues);
}
use of gate.Annotation in project gate-core by GateNLP.
the class CorpusBenchmarkTool method countWords.
/**
* Count all Token.kind=word annotations in the document
*/
protected int countWords(Document annotDoc) {
int count = 0;
if (annotDoc == null)
return 0;
// check for Token in outputSetName
AnnotationSet tokens = annotDoc.getAnnotations(outputSetName).get("Token");
if (tokens == null)
return 0;
Iterator<Annotation> it = tokens.iterator();
Annotation currAnnotation;
while (it.hasNext()) {
currAnnotation = it.next();
Object feature = currAnnotation.getFeatures().get("kind");
if (feature != null && "word".equalsIgnoreCase((String) feature))
++count;
}
return count;
}
use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationDiffer method calculateDiff.
/**
* Computes a diff between two collections of annotations.
* @param key the collection of key annotations.
* @param response the collection of response annotations.
* @return a list of {@link Pairing} objects representing the pairing set
* that results in the best score.
*/
public List<Pairing> calculateDiff(Collection<Annotation> key, Collection<Annotation> response) {
// initialise data structures
if (key == null || key.size() == 0)
keyList = new ArrayList<Annotation>();
else
keyList = new ArrayList<Annotation>(key);
if (response == null || response.size() == 0)
responseList = new ArrayList<Annotation>();
else
responseList = new ArrayList<Annotation>(response);
if (correctAnnotations != null) {
correctAnnotations.clear();
} else {
correctAnnotations = new HashSet<Annotation>();
}
if (partiallyCorrectAnnotations != null) {
partiallyCorrectAnnotations.clear();
} else {
partiallyCorrectAnnotations = new HashSet<Annotation>();
}
if (missingAnnotations != null) {
missingAnnotations.clear();
} else {
missingAnnotations = new HashSet<Annotation>();
}
if (spuriousAnnotations != null) {
spuriousAnnotations.clear();
} else {
spuriousAnnotations = new HashSet<Annotation>();
}
keyChoices = new ArrayList<List<Pairing>>(keyList.size());
keyChoices.addAll(Collections.nCopies(keyList.size(), (List<Pairing>) null));
responseChoices = new ArrayList<List<Pairing>>(responseList.size());
responseChoices.addAll(Collections.nCopies(responseList.size(), (List<Pairing>) null));
possibleChoices = new ArrayList<Pairing>();
// 1) try all possible pairings
for (int i = 0; i < keyList.size(); i++) {
for (int j = 0; j < responseList.size(); j++) {
Annotation keyAnn = keyList.get(i);
Annotation resAnn = responseList.get(j);
PairingImpl choice = null;
if (keyAnn.coextensive(resAnn)) {
// we have full overlap -> CORRECT or WRONG
if (keyAnn.isCompatible(resAnn, significantFeaturesSet)) {
// we have a full match
choice = new PairingImpl(i, j, CORRECT_VALUE);
} else {
// the two annotations are coextensive but don't match
// we have a missmatch
choice = new PairingImpl(i, j, MISMATCH_VALUE);
}
} else if (keyAnn.overlaps(resAnn)) {
// we have partial overlap -> PARTIALLY_CORRECT or WRONG
if (keyAnn.isPartiallyCompatible(resAnn, significantFeaturesSet)) {
choice = new PairingImpl(i, j, PARTIALLY_CORRECT_VALUE);
} else {
choice = new PairingImpl(i, j, WRONG_VALUE);
}
}
// add the new choice if any
if (choice != null) {
addPairing(choice, i, keyChoices);
addPairing(choice, j, responseChoices);
possibleChoices.add(choice);
}
}
// for j
}
// for i
// 2) from all possible pairings, find the maximal set that also
// maximises the total score
Collections.sort(possibleChoices, new PairingScoreComparator());
Collections.reverse(possibleChoices);
finalChoices = new ArrayList<Pairing>();
correctMatches = 0;
partiallyCorrectMatches = 0;
missing = 0;
spurious = 0;
while (!possibleChoices.isEmpty()) {
PairingImpl bestChoice = (PairingImpl) possibleChoices.remove(0);
bestChoice.consume();
finalChoices.add(bestChoice);
switch(bestChoice.value) {
case CORRECT_VALUE:
{
correctAnnotations.add(bestChoice.getResponse());
correctMatches++;
bestChoice.setType(CORRECT_TYPE);
break;
}
case PARTIALLY_CORRECT_VALUE:
{
partiallyCorrectAnnotations.add(bestChoice.getResponse());
partiallyCorrectMatches++;
bestChoice.setType(PARTIALLY_CORRECT_TYPE);
break;
}
case MISMATCH_VALUE:
{
// this is a missing and a spurious annotations together
missingAnnotations.add(bestChoice.getKey());
missing++;
spuriousAnnotations.add(bestChoice.getResponse());
spurious++;
bestChoice.setType(MISMATCH_TYPE);
break;
}
case WRONG_VALUE:
{
if (bestChoice.getKey() != null) {
// we have a missed key
if (missingAnnotations == null)
missingAnnotations = new HashSet<Annotation>();
missingAnnotations.add(bestChoice.getKey());
missing++;
bestChoice.setType(MISSING_TYPE);
}
if (bestChoice.getResponse() != null) {
// we have a spurious response
if (spuriousAnnotations == null)
spuriousAnnotations = new HashSet<Annotation>();
spuriousAnnotations.add(bestChoice.getResponse());
spurious++;
bestChoice.setType(SPURIOUS_TYPE);
}
break;
}
default:
{
throw new GateRuntimeException("Invalid pairing type: " + bestChoice.value);
}
}
}
// get the unmatched keys
for (int i = 0; i < keyChoices.size(); i++) {
List<Pairing> aList = keyChoices.get(i);
if (aList == null || aList.isEmpty()) {
if (missingAnnotations == null)
missingAnnotations = new HashSet<Annotation>();
missingAnnotations.add((keyList.get(i)));
Pairing choice = new PairingImpl(i, -1, WRONG_VALUE);
choice.setType(MISSING_TYPE);
finalChoices.add(choice);
missing++;
}
}
// get the unmatched responses
for (int i = 0; i < responseChoices.size(); i++) {
List<Pairing> aList = responseChoices.get(i);
if (aList == null || aList.isEmpty()) {
if (spuriousAnnotations == null)
spuriousAnnotations = new HashSet<Annotation>();
spuriousAnnotations.add((responseList.get(i)));
PairingImpl choice = new PairingImpl(-1, i, WRONG_VALUE);
choice.setType(SPURIOUS_TYPE);
finalChoices.add(choice);
spurious++;
}
}
return finalChoices;
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentStaxUtils method writeAnnotationSet.
/**
* Writes the given annotation set to an XMLStreamWriter as GATE XML
* format. The value for the Name attribute of the generated
* AnnotationSet element is given by <code>asName</code>.
*
* @param annotations the annotation set to write
* @param asName the name under which to write the annotation set.
* <code>null</code> means that no name will be used.
* @param xsw the writer to use for output
* @param namespaceURI
* @throws XMLStreamException
*/
public static void writeAnnotationSet(Collection<Annotation> annotations, String asName, XMLStreamWriter xsw, String namespaceURI) throws XMLStreamException {
xsw.writeStartElement(namespaceURI, "AnnotationSet");
if (asName != null) {
xsw.writeAttribute("Name", asName);
}
newLine(xsw);
if (annotations != null) {
Iterator<Annotation> iterator = annotations.iterator();
while (iterator.hasNext()) {
Annotation annot = iterator.next();
xsw.writeStartElement(namespaceURI, "Annotation");
xsw.writeAttribute("Id", String.valueOf(annot.getId()));
xsw.writeAttribute("Type", annot.getType());
xsw.writeAttribute("StartNode", String.valueOf(annot.getStartNode().getOffset()));
xsw.writeAttribute("EndNode", String.valueOf(annot.getEndNode().getOffset()));
newLine(xsw);
writeFeatures(annot.getFeatures(), xsw, namespaceURI);
xsw.writeEndElement();
newLine(xsw);
}
}
// end AnnotationSet element
xsw.writeEndElement();
newLine(xsw);
}
Aggregations