use of gate.Annotation in project gate-core by GateNLP.
the class CorpusBenchmarkTool method printAnnotations.
protected void printAnnotations(Set<Annotation> set, Document doc) {
if (set == null || set.isEmpty())
Iterator<Annotation> iter = set.iterator();
while (iter.hasNext()) {
Annotation ann =;
Out.prln("<B>" + doc.getContent().toString().substring(ann.getStartNode().getOffset().intValue(), ann.getEndNode().getOffset().intValue()) + "</B>: <I>[" + ann.getStartNode().getOffset() + "," + ann.getEndNode().getOffset() + "]</I>");
// while
use of gate.Annotation in project gate-core by GateNLP.
the class CorpusBenchmarkTool method storeAnnotations.
// storeAnnotations
protected void storeAnnotations(String type, Set<Annotation> set, Document doc, Writer file) throws IOException {
if (set == null || set.isEmpty())
Iterator<Annotation> iter = set.iterator();
Annotation ann;
while (iter.hasNext()) {
ann =;
file.write(doc.getContent().toString().substring(ann.getStartNode().getOffset().intValue(), ann.getEndNode().getOffset().intValue()));
// while
use of gate.Annotation in project gate-core by GateNLP.
the class OntologyMeasures method calculateBdm.
* For a document get the annotation differs that contain the type to compare
* and the annotation differs that may have miscategorized annotations
* for this type. Then we try to find miscategorized types that are close
* enough from the main type and use their BDM value to get an augmented
* precision, recall and fscore.
* @param differs annotation differ for the type and for possible
* miscategorized types.
public void calculateBdm(Collection<AnnotationDiffer> differs) {
if (bdmByConceptsMap == null) {
// load BDM file with scores for each concept/annotation type pair
// read the bdm scores
bdmByConceptsMap = read(bdmFileUrl);
// calculate BDM from the spurious and missing annotations
Set<Annotation> unpairedResponseAnnotations = new HashSet<Annotation>();
Set<Annotation> unpairedKeyAnnotations;
// will use the whole spurious annotations as the second set to compare
for (AnnotationDiffer differ : differs) {
for (AnnotationDiffer differ : differs) {
unpairedKeyAnnotations = differ.getAnnotationsOfType(AnnotationDiffer.MISSING_TYPE);
if (!bdmByTypeMap.containsKey(differ.getAnnotationType())) {
bdmByTypeMap.put(differ.getAnnotationType(), 0f);
// use the missing annotations as the first set to compare
for (Annotation unpairedKeyAnnotation : unpairedKeyAnnotations) {
String type = unpairedKeyAnnotation.getType();
// Out.prln("unpairedKeyAnnotation: " + unpairedKeyAnnotation.toString());
Iterator<Annotation> iterator = unpairedResponseAnnotations.iterator();
// use the spurious annotations as the second set to compare
while (iterator.hasNext()) {
Annotation unpairedResponseAnnotation =;
// Out.prln("unpairedResponsAnnotation: "
// + unpairedResponseAnnotation.toString());
float bdm = 0;
// annotations have the same start and end offsets
if (unpairedKeyAnnotation.coextensive(unpairedResponseAnnotation)) {
// compare both features values with BDM pairs
if (differ.getSignificantFeaturesSet() != null) {
if (!type.equals(unpairedResponseAnnotation.getType())) {
// types must be the same
for (Object feature : differ.getSignificantFeaturesSet()) {
if (unpairedKeyAnnotation.getFeatures() == null || unpairedResponseAnnotation.getFeatures() == null) {
// Out.prln("Feature: " + feature);
String keyLabel = (String) unpairedKeyAnnotation.getFeatures().get(feature);
// Out.prln("KeyLabel: " + keyLabel);
String responseLabel = (String) unpairedResponseAnnotation.getFeatures().get(feature);
// Out.prln("ResponseLabel: " + responseLabel);
if (keyLabel == null || responseLabel == null) {
// do nothing
} else if (bdmByConceptsMap.containsKey(keyLabel + ", " + responseLabel)) {
bdm += bdmByConceptsMap.get(keyLabel + ", " + responseLabel);
} else if (bdmByConceptsMap.containsKey(responseLabel + ", " + keyLabel)) {
bdm += bdmByConceptsMap.get(responseLabel + ", " + keyLabel);
bdm = bdm / differ.getSignificantFeaturesSet().size();
} else {
// compare both types with BDM pairs
if (bdmByConceptsMap.containsKey(type + ',' + unpairedResponseAnnotation.getType())) {
bdm = bdmByConceptsMap.get(type + ',' + unpairedResponseAnnotation.getType());
} else if (bdmByConceptsMap.containsKey(unpairedResponseAnnotation.getType() + ", " + type)) {
bdm = bdmByConceptsMap.get(unpairedResponseAnnotation.getType() + ", " + type);
if (bdm > 0) {
bdmByTypeMap.put(type, bdmByTypeMap.get(type) + bdm);
// Out.prln("BDM: " + bdmByTypeMap.get(type));
Map<String, List<AnnotationDiffer>> differsByTypeMap = new HashMap<String, List<AnnotationDiffer>>();
for (AnnotationDiffer differ : differs) {
// we consider that all annotations in AnnotationDiffer are the same type
String type = differ.getAnnotationType();
List<AnnotationDiffer> differsType = differsByTypeMap.get(type);
if (differsType == null) {
differsType = new ArrayList<AnnotationDiffer>();
differsByTypeMap.put(type, differsType);
// combine the list of AnnotationDiffer for each type
for (Map.Entry<String, List<AnnotationDiffer>> entry : differsByTypeMap.entrySet()) {
differByTypeMap.put(entry.getKey(), new AnnotationDiffer(entry.getValue()));
use of gate.Annotation in project gate-core by GateNLP.
the class TestXml method testAnnotationConsistencyForSaveAsXml.
// testUnpackMarkup()
* This method runs ANNIE with defaults on a document, then saves
* it as a GATE XML document and loads it back. All the annotations on the
* loaded document should be the same as the original ones.
* It also verifies if the matches feature still holds after an export/import to XML
public void testAnnotationConsistencyForSaveAsXml() throws Exception {
// Load a document from the test repository
// Document origDoc = gate.Factory.newDocument(Gate.getUrl("tests/xml/gateTestSaveAsXML.xml"));
String testDoc = gate.util.Files.getGateResourceAsString("");
Document origDoc = gate.Factory.newDocument(testDoc);
// Verifies if the maximum annotation ID on the origDoc is less than the
// Annotation ID generator of the document.
// create a couple of annotations with features we can look at after a round trip to disc
Integer ann1ID = origDoc.getAnnotations().add(0L, 10L, "Test", Factory.newFeatureMap());
Integer ann2ID = origDoc.getAnnotations().add(15L, 20L, "Test", Factory.newFeatureMap());
origDoc.getAnnotations().get(ann1ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann2ID }));
origDoc.getAnnotations().get(ann2ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann1ID }));
// SaveAS XML and reload the document into another GATE doc
// Export the Gate document called origDoc as XML, into a temp file,
// using the working encoding
File xmlFile = Files.writeTempFile(origDoc.toXml(), workingEncoding);
System.out.println("Saved to temp file :" + xmlFile.toURI().toURL());
Document reloadedDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding);
// Verifies if the maximum annotation ID on the origDoc is less than the
// Annotation ID generator of the document.
// Verify if the annotations are identical in the two docs.
Map<Integer, Annotation> origAnnotMap = buildID2AnnotMap(origDoc);
Map<Integer, Annotation> reloadedAnnMap = buildID2AnnotMap(reloadedDoc);
// Verifies if the reloaded annotations are the same as the original ones
verifyIDConsistency(origAnnotMap, reloadedAnnMap);
// Build the original Matches map
// ID -> List of IDs
Map<Integer, List<Integer>> origMatchesMap = buildMatchesMap(origDoc);
// extracted from the reloadedMAp
for (Iterator<Integer> it = origMatchesMap.keySet().iterator(); it.hasNext(); ) {
Integer id =;
Annotation origAnnot = origAnnotMap.get(id);
assertTrue("Couldn't find an original annot with ID=" + id, origAnnot != null);
Annotation reloadedAnnot = reloadedAnnMap.get(id);
assertTrue("Couldn't find a reloaded annot with ID=" + id, reloadedAnnot != null);
compareAnnot(origAnnot, reloadedAnnot);
// Iterate through the matches list and repeat the comparison
List<Integer> matchesList = origMatchesMap.get(id);
for (Iterator<Integer> itList = matchesList.iterator(); itList.hasNext(); ) {
Integer matchId =;
Annotation origA = origAnnotMap.get(matchId);
assertTrue("Couldn't find an original annot with ID=" + matchId, origA != null);
Annotation reloadedA = reloadedAnnMap.get(matchId);
assertTrue("Couldn't find a reloaded annot with ID=" + matchId, reloadedA != null);
compareAnnot(origA, reloadedA);
// End for
// End for
// Clean up the XMl file
use of gate.Annotation in project gate-core by GateNLP.
the class TestAnnotationMerging method testWithfeat.
* The actual method for testing.
public void testWithfeat(String nameAnnSets, String nameAnnType, String nameAnnFeat, Corpus data, boolean isUsingMajority) {
// get the annotation sets
String[] annSetsN = nameAnnSets.split(";");
int numJudges = annSetsN.length;
int numDocs = data.size();
AnnotationSet[][] annArr2 = new AnnotationSet[numDocs][numJudges];
for (int i = 0; i < numDocs; ++i) {
Document doc = data.get(i);
for (int j = 0; j < numJudges; ++j) {
// Get the annotation
annArr2[i][j] = doc.getAnnotations(annSetsN[j]).get(nameAnnType);
// Annotation merging
boolean isTheSameInstances = true;
for (int i = 0; i < annArr2.length; ++i) if (!AnnotationMerging.isSameInstancesForAnnotators(annArr2[i], 1)) {
isTheSameInstances = false;
HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>();
if (isUsingMajority)
AnnotationMerging.mergeAnnotationMajority(annArr2[0], nameAnnFeat, mergeInfor, isTheSameInstances);
AnnotationMerging.mergeAnnotation(annArr2[0], nameAnnFeat, mergeInfor, 2, isTheSameInstances);
int numAnns = 0;
if (isTheSameInstances) {
for (Annotation ann : mergeInfor.keySet()) {
if (ann.getFeatures().get(nameAnnFeat) != null)
} else {
numAnns = mergeInfor.size();