Search in sources :

Example 6 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class CorpusBenchmarkTool method printAnnotations.

protected void printAnnotations(Set<Annotation> set, Document doc) {
    if (set == null || set.isEmpty())
        return;
    Iterator<Annotation> iter = set.iterator();
    while (iter.hasNext()) {
        Annotation ann = iter.next();
        Out.prln("<B>" + doc.getContent().toString().substring(ann.getStartNode().getOffset().intValue(), ann.getEndNode().getOffset().intValue()) + "</B>: <I>[" + ann.getStartNode().getOffset() + "," + ann.getEndNode().getOffset() + "]</I>");
    }
// while
}
Also used : Annotation(gate.Annotation)

Example 7 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class CorpusBenchmarkTool method storeAnnotations.

// storeAnnotations
protected void storeAnnotations(String type, Set<Annotation> set, Document doc, Writer file) throws IOException {
    if (set == null || set.isEmpty())
        return;
    Iterator<Annotation> iter = set.iterator();
    Annotation ann;
    while (iter.hasNext()) {
        ann = iter.next();
        file.write(type);
        file.write(".");
        file.write(doc.getContent().toString().substring(ann.getStartNode().getOffset().intValue(), ann.getEndNode().getOffset().intValue()));
        file.write(".");
        file.write(ann.getStartNode().getOffset().toString());
        file.write(".");
        file.write(ann.getEndNode().getOffset().toString());
        file.write("\n");
    }
// while
}
Also used : Annotation(gate.Annotation)

Example 8 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class OntologyMeasures method calculateBdm.

/**
 * For a document get the annotation differs that contain the type to compare
 * and the annotation differs that may have miscategorized annotations
 * for this type. Then we try to find miscategorized types that are close
 * enough from the main type and use their BDM value to get an augmented
 * precision, recall and fscore.
 *
 * @param differs annotation differ for the type and for possible
 * miscategorized types.
 */
public void calculateBdm(Collection<AnnotationDiffer> differs) {
    if (bdmByConceptsMap == null) {
        // load BDM file with scores for each concept/annotation type pair
        // read the bdm scores
        bdmByConceptsMap = read(bdmFileUrl);
    }
    // calculate BDM from the spurious and missing annotations
    Set<Annotation> unpairedResponseAnnotations = new HashSet<Annotation>();
    Set<Annotation> unpairedKeyAnnotations;
    // will use the whole spurious annotations as the second set to compare
    for (AnnotationDiffer differ : differs) {
        unpairedResponseAnnotations.addAll(differ.getAnnotationsOfType(AnnotationDiffer.SPURIOUS_TYPE));
    }
    bdmByTypeMap.clear();
    for (AnnotationDiffer differ : differs) {
        unpairedKeyAnnotations = differ.getAnnotationsOfType(AnnotationDiffer.MISSING_TYPE);
        if (!bdmByTypeMap.containsKey(differ.getAnnotationType())) {
            bdmByTypeMap.put(differ.getAnnotationType(), 0f);
        }
        // use the missing annotations as the first set to compare
        for (Annotation unpairedKeyAnnotation : unpairedKeyAnnotations) {
            String type = unpairedKeyAnnotation.getType();
            // Out.prln("unpairedKeyAnnotation: " + unpairedKeyAnnotation.toString());
            Iterator<Annotation> iterator = unpairedResponseAnnotations.iterator();
            // use the spurious annotations as the second set to compare
            while (iterator.hasNext()) {
                Annotation unpairedResponseAnnotation = iterator.next();
                // Out.prln("unpairedResponsAnnotation: "
                // + unpairedResponseAnnotation.toString());
                float bdm = 0;
                // annotations have the same start and end offsets
                if (unpairedKeyAnnotation.coextensive(unpairedResponseAnnotation)) {
                    // compare both features values with BDM pairs
                    if (differ.getSignificantFeaturesSet() != null) {
                        if (!type.equals(unpairedResponseAnnotation.getType())) {
                            // types must be the same
                            continue;
                        }
                        for (Object feature : differ.getSignificantFeaturesSet()) {
                            if (unpairedKeyAnnotation.getFeatures() == null || unpairedResponseAnnotation.getFeatures() == null) {
                                continue;
                            }
                            // Out.prln("Feature: " + feature);
                            String keyLabel = (String) unpairedKeyAnnotation.getFeatures().get(feature);
                            // Out.prln("KeyLabel: " + keyLabel);
                            String responseLabel = (String) unpairedResponseAnnotation.getFeatures().get(feature);
                            // Out.prln("ResponseLabel: " + responseLabel);
                            if (keyLabel == null || responseLabel == null) {
                            // do nothing
                            } else if (bdmByConceptsMap.containsKey(keyLabel + ", " + responseLabel)) {
                                bdm += bdmByConceptsMap.get(keyLabel + ", " + responseLabel);
                            } else if (bdmByConceptsMap.containsKey(responseLabel + ", " + keyLabel)) {
                                bdm += bdmByConceptsMap.get(responseLabel + ", " + keyLabel);
                            }
                        }
                        bdm = bdm / differ.getSignificantFeaturesSet().size();
                    } else {
                        // compare both types with BDM pairs
                        if (bdmByConceptsMap.containsKey(type + ',' + unpairedResponseAnnotation.getType())) {
                            bdm = bdmByConceptsMap.get(type + ',' + unpairedResponseAnnotation.getType());
                        } else if (bdmByConceptsMap.containsKey(unpairedResponseAnnotation.getType() + ", " + type)) {
                            bdm = bdmByConceptsMap.get(unpairedResponseAnnotation.getType() + ", " + type);
                        }
                    }
                    if (bdm > 0) {
                        bdmByTypeMap.put(type, bdmByTypeMap.get(type) + bdm);
                        iterator.remove();
                    // Out.prln("BDM: " + bdmByTypeMap.get(type));
                    }
                }
            }
        }
    }
    differByTypeMap.clear();
    Map<String, List<AnnotationDiffer>> differsByTypeMap = new HashMap<String, List<AnnotationDiffer>>();
    for (AnnotationDiffer differ : differs) {
        // we consider that all annotations in AnnotationDiffer are the same type
        String type = differ.getAnnotationType();
        List<AnnotationDiffer> differsType = differsByTypeMap.get(type);
        if (differsType == null) {
            differsType = new ArrayList<AnnotationDiffer>();
        }
        differsType.add(differ);
        differsByTypeMap.put(type, differsType);
    }
    // combine the list of AnnotationDiffer for each type
    for (Map.Entry<String, List<AnnotationDiffer>> entry : differsByTypeMap.entrySet()) {
        differByTypeMap.put(entry.getKey(), new AnnotationDiffer(entry.getValue()));
    }
}
Also used : Annotation(gate.Annotation)

Example 9 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class TestXml method testAnnotationConsistencyForSaveAsXml.

// testUnpackMarkup()
/*
   * This method runs ANNIE with defaults on a document, then saves
   * it as a GATE XML document and loads it back. All the annotations on the
   * loaded document should be the same as the original ones.
   *
   * It also verifies if the matches feature still holds after an export/import to XML
   */
public void testAnnotationConsistencyForSaveAsXml() throws Exception {
    // Load a document from the test repository
    // Document origDoc = gate.Factory.newDocument(Gate.getUrl("tests/xml/gateTestSaveAsXML.xml"));
    String testDoc = gate.util.Files.getGateResourceAsString("gate.ac.uk/tests/xml/gateTestSaveAsXML.xml");
    Document origDoc = gate.Factory.newDocument(testDoc);
    // Verifies if the maximum annotation ID on the origDoc is less than the
    // Annotation ID generator of the document.
    verifyAnnotationIDGenerator(origDoc);
    // create a couple of annotations with features we can look at after a round trip to disc
    Integer ann1ID = origDoc.getAnnotations().add(0L, 10L, "Test", Factory.newFeatureMap());
    Integer ann2ID = origDoc.getAnnotations().add(15L, 20L, "Test", Factory.newFeatureMap());
    origDoc.getAnnotations().get(ann1ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann2ID }));
    origDoc.getAnnotations().get(ann2ID).getFeatures().put("matches", Arrays.asList(new Integer[] { ann1ID }));
    // SaveAS XML and reload the document into another GATE doc
    // Export the Gate document called origDoc as XML, into a temp file,
    // using the working encoding
    File xmlFile = Files.writeTempFile(origDoc.toXml(), workingEncoding);
    System.out.println("Saved to temp file :" + xmlFile.toURI().toURL());
    Document reloadedDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding);
    // Verifies if the maximum annotation ID on the origDoc is less than the
    // Annotation ID generator of the document.
    verifyAnnotationIDGenerator(reloadedDoc);
    // Verify if the annotations are identical in the two docs.
    Map<Integer, Annotation> origAnnotMap = buildID2AnnotMap(origDoc);
    Map<Integer, Annotation> reloadedAnnMap = buildID2AnnotMap(reloadedDoc);
    // Verifies if the reloaded annotations are the same as the original ones
    verifyIDConsistency(origAnnotMap, reloadedAnnMap);
    // Build the original Matches map
    // ID  -> List of IDs
    Map<Integer, List<Integer>> origMatchesMap = buildMatchesMap(origDoc);
    // extracted from the reloadedMAp
    for (Iterator<Integer> it = origMatchesMap.keySet().iterator(); it.hasNext(); ) {
        Integer id = it.next();
        Annotation origAnnot = origAnnotMap.get(id);
        assertTrue("Couldn't find an original annot with ID=" + id, origAnnot != null);
        Annotation reloadedAnnot = reloadedAnnMap.get(id);
        assertTrue("Couldn't find a reloaded annot with ID=" + id, reloadedAnnot != null);
        compareAnnot(origAnnot, reloadedAnnot);
        // Iterate through the matches list and repeat the comparison
        List<Integer> matchesList = origMatchesMap.get(id);
        for (Iterator<Integer> itList = matchesList.iterator(); itList.hasNext(); ) {
            Integer matchId = itList.next();
            Annotation origA = origAnnotMap.get(matchId);
            assertTrue("Couldn't find an original annot with ID=" + matchId, origA != null);
            Annotation reloadedA = reloadedAnnMap.get(matchId);
            assertTrue("Couldn't find a reloaded annot with ID=" + matchId, reloadedA != null);
            compareAnnot(origA, reloadedA);
        }
    // End for
    }
    // End for
    // Clean up the XMl file
    xmlFile.delete();
}
Also used : List(java.util.List) LinkedList(java.util.LinkedList) TestDocument(gate.corpora.TestDocument) Document(gate.Document) File(java.io.File) Annotation(gate.Annotation)

Example 10 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class TestAnnotationMerging method testWithfeat.

/**
 * The actual method for testing.
 */
public void testWithfeat(String nameAnnSets, String nameAnnType, String nameAnnFeat, Corpus data, boolean isUsingMajority) {
    // get the annotation sets
    String[] annSetsN = nameAnnSets.split(";");
    int numJudges = annSetsN.length;
    int numDocs = data.size();
    AnnotationSet[][] annArr2 = new AnnotationSet[numDocs][numJudges];
    for (int i = 0; i < numDocs; ++i) {
        Document doc = data.get(i);
        for (int j = 0; j < numJudges; ++j) {
            // Get the annotation
            annArr2[i][j] = doc.getAnnotations(annSetsN[j]).get(nameAnnType);
        }
    }
    // Annotation merging
    boolean isTheSameInstances = true;
    for (int i = 0; i < annArr2.length; ++i) if (!AnnotationMerging.isSameInstancesForAnnotators(annArr2[i], 1)) {
        isTheSameInstances = false;
        break;
    }
    HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>();
    if (isUsingMajority)
        AnnotationMerging.mergeAnnotationMajority(annArr2[0], nameAnnFeat, mergeInfor, isTheSameInstances);
    else
        AnnotationMerging.mergeAnnotation(annArr2[0], nameAnnFeat, mergeInfor, 2, isTheSameInstances);
    int numAnns = 0;
    if (isTheSameInstances) {
        for (Annotation ann : mergeInfor.keySet()) {
            if (ann.getFeatures().get(nameAnnFeat) != null)
                ++numAnns;
        }
    } else {
        numAnns = mergeInfor.size();
    }
    checkNumbers(numAnns);
}
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) Document(gate.Document) TestDocument(gate.corpora.TestDocument) Annotation(gate.Annotation)

Aggregations

Annotation (gate.Annotation)69 AnnotationSet (gate.AnnotationSet)28 ArrayList (java.util.ArrayList)24 HashMap (java.util.HashMap)15 Node (gate.Node)10 HashSet (java.util.HashSet)10 List (java.util.List)10 FeatureMap (gate.FeatureMap)8 Map (java.util.Map)8 TreeSet (java.util.TreeSet)8 Document (gate.Document)7 InvalidOffsetException (gate.util.InvalidOffsetException)7 Point (java.awt.Point)6 LinkedList (java.util.LinkedList)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)3 Color (java.awt.Color)3 Stack (java.util.Stack)3 TreeMap (java.util.TreeMap)3