use of gate.AnnotationSet in project gate-core by GateNLP.
the class AnnotationMerging method mergeAnnotation.
/**
* Merge all annotationset from an array. If one annotation is in at least
* numK annotation sets, then put it into the merging annotation set.
*/
public static void mergeAnnotation(AnnotationSet[] annsArr, String nameFeat, HashMap<Annotation, String> mergeAnns, int numMinK, boolean isTheSameInstances) {
int numA = annsArr.length;
// First copy the annotatioin sets into a temp array
@SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
for (int i = 0; i < numA; ++i) {
if (annsArr[i] != null) {
annsArrTemp[i] = new HashSet<Annotation>();
for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
}
}
HashSet<String> featSet = new HashSet<String>();
if (nameFeat != null)
featSet.add(nameFeat);
if (numMinK < 1)
numMinK = 1;
for (int iA = 0; iA < numA - numMinK + 1; ++iA) {
if (annsArrTemp[iA] != null) {
for (Annotation ann : annsArrTemp[iA]) {
int numContained = 1;
StringBuffer featAdd = new StringBuffer();
featAdd.append(iA);
StringBuffer featDisa = new StringBuffer();
if (iA > 0) {
featDisa.append("0");
for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
}
int numDisagreed = iA;
for (int i = iA + 1; i < numA; ++i) {
boolean isContained = false;
if (annsArrTemp[i] != null) {
Annotation annT = null;
for (Annotation ann0 : annsArrTemp[i]) {
if (ann0.isCompatible(ann, featSet)) {
++numContained;
featAdd.append("-" + i);
annT = ann0;
isContained = true;
break;
}
}
if (isContained)
annsArrTemp[i].remove(annT);
}
if (!isContained) {
if (numDisagreed == 0)
featDisa.append(i);
else
featDisa.append("-" + i);
++numDisagreed;
}
}
if (numContained >= numMinK) {
mergeAnns.put(ann, featAdd.toString());
} else if (isTheSameInstances && nameFeat != null) {
ann.getFeatures().remove(nameFeat);
mergeAnns.put(ann, featAdd.toString());
}
}
}
}
// Remove the annotation in the same place
removeDuplicate(mergeAnns);
return;
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class AnnotationMerging method mergeAnnotationMajority.
/**
* Merge all annotationset from an array. If one annotation is agreed by
* the majority of the annotators, then put it into the merging annotation set.
*/
public static void mergeAnnotationMajority(AnnotationSet[] annsArr, String nameFeat, HashMap<Annotation, String> mergeAnns, boolean isTheSameInstances) {
int numA = annsArr.length;
if (nameFeat == null) {
mergeAnnogationMajorityNoFeat(annsArr, mergeAnns, isTheSameInstances);
return;
}
// First copy the annotatioin sets into a temp array
@SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
for (int i = 0; i < numA; ++i) {
if (annsArr[i] != null) {
annsArrTemp[i] = new HashSet<Annotation>();
for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
}
}
for (int iA = 0; iA < numA; ++iA) {
if (annsArrTemp[iA] != null) {
for (Annotation ann : annsArrTemp[iA]) {
int numDisagreed = 0;
// Already the iA annotators don't agree the annotation
numDisagreed = iA;
StringBuffer featDisa = new StringBuffer();
if (iA > 0) {
featDisa.append("0");
for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
}
HashMap<String, String> featOthers = new HashMap<String, String>();
String featTh = null;
if (ann.getFeatures().get(nameFeat) != null)
featTh = ann.getFeatures().get(nameFeat).toString();
featOthers.put(featTh, new Integer(iA).toString());
HashMap<String, Annotation> annAll = new HashMap<String, Annotation>();
annAll.put(featTh, ann);
for (int i = iA + 1; i < numA; ++i) {
boolean isContained = false;
if (annsArrTemp[i] != null) {
Annotation annT = null;
for (Annotation ann0 : annsArrTemp[i]) {
if (ann0.coextensive(ann)) {
String featValue = null;
if (ann0.getFeatures().get(nameFeat) != null)
featValue = ann0.getFeatures().get(nameFeat).toString();
if (!featOthers.containsKey(featValue)) {
featOthers.put(featValue, new Integer(i).toString());
annAll.put(featValue, ann0);
} else {
String str = featOthers.get(featValue);
featOthers.put(featValue, str + "-" + i);
}
annT = ann0;
isContained = true;
break;
}
}
if (isContained)
annsArrTemp[i].remove(annT);
}
if (!isContained) {
if (numDisagreed == 0)
featDisa.append(i);
else
featDisa.append("-" + i);
++numDisagreed;
}
}
// end of the loop for the following annotation set
int numAgreed = -1;
String agreeFeat = null;
for (String str : featOthers.keySet()) {
String str0 = featOthers.get(str);
int num = 1;
while (str0.contains("-")) {
++num;
str0 = str0.substring(str0.indexOf('-') + 1);
}
if (numAgreed < num) {
numAgreed = num;
agreeFeat = str;
}
}
if (numAgreed >= numDisagreed) {
mergeAnns.put(annAll.get(agreeFeat), featOthers.get(agreeFeat));
} else if (isTheSameInstances) {
if (ann.getFeatures().get(nameFeat) != null)
ann.getFeatures().remove(nameFeat);
mergeAnns.put(ann, featDisa.toString());
}
}
// for each ann in the current annotation set
}
}
return;
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class CorpusBenchmarkTool method measureDocs.
// calculateAvgTotal
protected AnnotationDiffer measureDocs(Document keyDoc, Document respDoc, String annotType) throws ResourceInstantiationException {
if (keyDoc == null || respDoc == null)
return null;
if (annotSetName != null && keyDoc.getAnnotations(annotSetName).get(annotType) == null)
return null;
else if ((annotSetName == null || annotSetName.equals("")) && keyDoc.getAnnotations().get(annotType) == null)
return null;
// create an annotation diff
AnnotationDiffer annotDiffer = new AnnotationDiffer();
// set the feature names set for annotation differ
annotDiffer.setSignificantFeaturesSet(diffFeaturesSet);
// we need to find the sets
AnnotationSet keys, responses;
if (annotSetName == null || annotSetName.equals("")) {
keys = keyDoc.getAnnotations().get(annotType);
responses = respDoc.getAnnotations().get(annotType);
} else {
keys = keyDoc.getAnnotations(annotSetName).get(annotType);
responses = respDoc.getAnnotations(outputSetName).get(annotType);
}
// we have annotation sets so call the annotationDiffer
annotDiffer.calculateDiff(keys, responses);
return annotDiffer;
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class TestAnnotationMerging method testWithfeat.
/**
* The actual method for testing.
*/
public void testWithfeat(String nameAnnSets, String nameAnnType, String nameAnnFeat, Corpus data, boolean isUsingMajority) {
// get the annotation sets
String[] annSetsN = nameAnnSets.split(";");
int numJudges = annSetsN.length;
int numDocs = data.size();
AnnotationSet[][] annArr2 = new AnnotationSet[numDocs][numJudges];
for (int i = 0; i < numDocs; ++i) {
Document doc = data.get(i);
for (int j = 0; j < numJudges; ++j) {
// Get the annotation
annArr2[i][j] = doc.getAnnotations(annSetsN[j]).get(nameAnnType);
}
}
// Annotation merging
boolean isTheSameInstances = true;
for (int i = 0; i < annArr2.length; ++i) if (!AnnotationMerging.isSameInstancesForAnnotators(annArr2[i], 1)) {
isTheSameInstances = false;
break;
}
HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>();
if (isUsingMajority)
AnnotationMerging.mergeAnnotationMajority(annArr2[0], nameAnnFeat, mergeInfor, isTheSameInstances);
else
AnnotationMerging.mergeAnnotation(annArr2[0], nameAnnFeat, mergeInfor, 2, isTheSameInstances);
int numAnns = 0;
if (isTheSameInstances) {
for (Annotation ann : mergeInfor.keySet()) {
if (ann.getFeatures().get(nameAnnFeat) != null)
++numAnns;
}
} else {
numAnns = mergeInfor.size();
}
checkNumbers(numAnns);
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class TestDiffer method testDiffer.
// tearDown
public void testDiffer() throws Exception {
Document doc = Factory.newDocument(new URL(gate.corpora.TestDocument.getTestServerName() + "tests/ft-bt-03-aug-2001.html"), "windows-1252");
AnnotationSet annSet = doc.getAnnotations();
// create 100 annotations
FeatureMap features = Factory.newFeatureMap();
features.put("type", "BAR");
for (int i = 0; i < 100; i++) {
annSet.add(new Long(i * 10), new Long((i + 1) * 10), "Foo", features);
}
List<Annotation> keySet = new ArrayList<Annotation>(annSet);
List<Annotation> responseSet = new ArrayList<Annotation>(annSet);
// check 100% Precision and recall
AnnotationDiffer differ = new AnnotationDiffer();
differ.setSignificantFeaturesSet(null);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
double value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getPrecisionLenient();
Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallLenient();
Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
// check low precision
Integer id = annSet.add(new Long(2), new Long(4), "Foo", features);
Annotation falsePositive = annSet.get(id);
responseSet.add(falsePositive);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of .99!", .99, value, .001);
// recall should still be 100%
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallLenient();
Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
// check low recall
responseSet.remove(falsePositive);
keySet.add(falsePositive);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of .99!", .99, value, .001);
// precision should still be 100%
value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getPrecisionLenient();
Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
}
Aggregations