Search in sources :

Example 11 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class TestDiffer method testDiffer.

// tearDown
public void testDiffer() throws Exception {
    Document doc = Factory.newDocument(new URL(gate.corpora.TestDocument.getTestServerName() + "tests/ft-bt-03-aug-2001.html"), "windows-1252");
    AnnotationSet annSet = doc.getAnnotations();
    // create 100 annotations
    FeatureMap features = Factory.newFeatureMap();
    features.put("type", "BAR");
    for (int i = 0; i < 100; i++) {
        annSet.add(new Long(i * 10), new Long((i + 1) * 10), "Foo", features);
    }
    List<Annotation> keySet = new ArrayList<Annotation>(annSet);
    List<Annotation> responseSet = new ArrayList<Annotation>(annSet);
    // check 100% Precision and recall
    AnnotationDiffer differ = new AnnotationDiffer();
    differ.setSignificantFeaturesSet(null);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    double value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getPrecisionLenient();
    Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallLenient();
    Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
    // check low precision
    Integer id = annSet.add(new Long(2), new Long(4), "Foo", features);
    Annotation falsePositive = annSet.get(id);
    responseSet.add(falsePositive);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of .99!", .99, value, .001);
    // recall should still be 100%
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallLenient();
    Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
    // check low recall
    responseSet.remove(falsePositive);
    keySet.add(falsePositive);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of .99!", .99, value, .001);
    // precision should still be 100%
    value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getPrecisionLenient();
    Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
}
Also used : FeatureMap(gate.FeatureMap) ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Document(gate.Document) URL(java.net.URL) Annotation(gate.Annotation)

Example 12 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class TestXml method verifyIDConsistency.

// End of verifyAnnotationIDGenerator()
/**
 * Verifies if the two maps hold annotations with the same ID. The only thing not checked
 * are the features, as some of them could be lost in the serialization/deserialization process
 * @param origAnnotMap A map by ID, containing the original annotations
 * @param reloadedAnnMap A map by ID, containing the recreated annotations
 */
private void verifyIDConsistency(Map<Integer, Annotation> origAnnotMap, Map<Integer, Annotation> reloadedAnnMap) {
    assertEquals("Found a different number of annot in both documents.", origAnnotMap.keySet().size(), reloadedAnnMap.keySet().size());
    for (Iterator<Integer> it = origAnnotMap.keySet().iterator(); it.hasNext(); ) {
        Integer id = it.next();
        Annotation origAnn = origAnnotMap.get(id);
        Annotation reloadedAnnot = reloadedAnnMap.get(id);
        assertTrue("Annotation with ID=" + id + " was not found in the reloaded document.", reloadedAnnot != null);
        compareAnnot(origAnn, reloadedAnnot);
    }
// End for
}
Also used : Annotation(gate.Annotation)

Example 13 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentStaxUtils method readXces.

/**
 * Read XML data in <a href="http://www.xces.org/">XCES</a> format
 * from the given reader and add the corresponding annotations to the
 * given annotation set. The reader must be positioned on the starting
 * <code>cesAna</code> tag and will be left pointing to the
 * corresponding end tag.
 *
 * @param xsr the XMLStreamReader to read from.
 * @param as the annotation set to read into.
 * @throws XMLStreamException
 */
public static void readXces(XMLStreamReader xsr, AnnotationSet as) throws XMLStreamException {
    xsr.require(XMLStreamConstants.START_ELEMENT, XCES_NAMESPACE, "cesAna");
    // Set of all annotation IDs in this set.
    Set<Integer> allAnnotIds = new TreeSet<Integer>();
    // pre-populate with the IDs of any existing annotations in the set
    for (Annotation a : as) {
        allAnnotIds.add(a.getId());
    }
    // lists to collect the annotations in before adding them to the
    // set. We collect the annotations that specify and ID (via
    // struct/@n) in one list and those that don't in another, so we can
    // add the identified ones first, then the others will take the next
    // available ID
    List<AnnotationObject> collectedIdentifiedAnnots = new ArrayList<AnnotationObject>();
    List<AnnotationObject> collectedNonIdentifiedAnnots = new ArrayList<AnnotationObject>();
    while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
        xsr.require(XMLStreamConstants.START_ELEMENT, XCES_NAMESPACE, "struct");
        AnnotationObject annObj = new AnnotationObject();
        annObj.setElemName(xsr.getAttributeValue(null, "type"));
        try {
            annObj.setStart(Long.valueOf(xsr.getAttributeValue(null, "from")));
        } catch (NumberFormatException nfe) {
            throw new XMLStreamException("Non-integer value found for struct/@from", xsr.getLocation());
        }
        try {
            annObj.setEnd(Long.valueOf(xsr.getAttributeValue(null, "to")));
        } catch (NumberFormatException nfe) {
            throw new XMLStreamException("Non-integer value found for struct/@to", xsr.getLocation());
        }
        String annotIdString = xsr.getAttributeValue(null, "n");
        if (annotIdString != null) {
            try {
                Integer annotationId = Integer.valueOf(annotIdString);
                if (allAnnotIds.contains(annotationId)) {
                    throw new XMLStreamException("Annotation IDs must be unique " + "within an annotation set. Found duplicate ID", xsr.getLocation());
                }
                allAnnotIds.add(annotationId);
                annObj.setId(annotationId);
            } catch (NumberFormatException nfe) {
                throw new XMLStreamException("Non-integer annotation ID found", xsr.getLocation());
            }
        }
        // get the features of this annotation
        annObj.setFM(readXcesFeatureMap(xsr));
        // readFeatureMap leaves xsr on the </Annotation> tag
        if (annObj.getId() != null) {
            collectedIdentifiedAnnots.add(annObj);
        } else {
            collectedNonIdentifiedAnnots.add(annObj);
        }
    }
    // finished reading, add the annotations to the set
    AnnotationObject a = null;
    try {
        // first the ones that specify an ID
        Iterator<AnnotationObject> it = collectedIdentifiedAnnots.iterator();
        while (it.hasNext()) {
            a = it.next();
            as.add(a.getId(), a.getStart(), a.getEnd(), a.getElemName(), a.getFM());
        }
        // next the ones that don't
        it = collectedNonIdentifiedAnnots.iterator();
        while (it.hasNext()) {
            a = it.next();
            as.add(a.getStart(), a.getEnd(), a.getElemName(), a.getFM());
        }
    } catch (InvalidOffsetException ioe) {
        throw new XMLStreamException("Invalid offset when creating annotation " + a, ioe);
    }
}
Also used : XMLStreamException(javax.xml.stream.XMLStreamException) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) InvalidOffsetException(gate.util.InvalidOffsetException) Annotation(gate.Annotation)

Example 14 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentStaxUtils method writeDocument.

/**
 * Write the specified GATE Document to an XMLStreamWriter. This
 * method writes just the GateDocument element - the XML declaration
 * must be filled in by the caller if required.
 *
 * @param doc the Document to write
 * @param annotationSets the annotations to include. If the map
 *          contains an entry for the key <code>null</code>, this
 *          will be treated as the default set. All other entries are
 *          treated as named annotation sets.
 * @param xsw the StAX XMLStreamWriter to use for output
 * @throws GateException if an error occurs during writing
 */
public static void writeDocument(Document doc, Map<String, Collection<Annotation>> annotationSets, XMLStreamWriter xsw, String namespaceURI) throws XMLStreamException {
    xsw.setDefaultNamespace(namespaceURI);
    xsw.writeStartElement(namespaceURI, "GateDocument");
    xsw.writeAttribute("version", GATE_XML_VERSION);
    if (namespaceURI.length() > 0) {
        xsw.writeDefaultNamespace(namespaceURI);
    }
    newLine(xsw);
    // features
    xsw.writeComment(" The document's features");
    newLine(xsw);
    newLine(xsw);
    xsw.writeStartElement(namespaceURI, "GateDocumentFeatures");
    newLine(xsw);
    writeFeatures(doc.getFeatures(), xsw, namespaceURI);
    // GateDocumentFeatures
    xsw.writeEndElement();
    newLine(xsw);
    // text with nodes
    xsw.writeComment(" The document content area with serialized nodes ");
    newLine(xsw);
    newLine(xsw);
    writeTextWithNodes(doc, annotationSets.values(), xsw, namespaceURI);
    newLine(xsw);
    // Serialize as XML all document's annotation sets
    // Serialize the default AnnotationSet
    StatusListener sListener = (StatusListener) gate.Gate.getListeners().get("gate.event.StatusListener");
    if (annotationSets.containsKey(null)) {
        if (sListener != null)
            sListener.statusChanged("Saving the default annotation set ");
        xsw.writeComment(" The default annotation set ");
        newLine(xsw);
        newLine(xsw);
        writeAnnotationSet(annotationSets.get(null), null, xsw, namespaceURI);
        newLine(xsw);
    }
    // while(iter.hasNext()) {
    for (Map.Entry<String, Collection<Annotation>> entry : annotationSets.entrySet()) {
        // iter.next();
        String annotationSetName = entry.getKey();
        // above
        if (annotationSetName != null) {
            // annotationSets.get(annotationSetName);
            Collection<Annotation> annots = entry.getValue();
            xsw.writeComment(" Named annotation set ");
            newLine(xsw);
            newLine(xsw);
            // Serialize it as XML
            if (sListener != null)
                sListener.statusChanged("Saving " + annotationSetName + " annotation set ");
            writeAnnotationSet(annots, annotationSetName, xsw, namespaceURI);
            newLine(xsw);
        }
    // End if
    }
    // End while
    Iterator<String> iter = annotationSets.keySet().iterator();
    while (iter.hasNext()) {
        writeRelationSet(doc.getAnnotations(iter.next()).getRelations(), xsw, namespaceURI);
    }
    // close the GateDocument element
    xsw.writeEndElement();
    newLine(xsw);
}
Also used : Collection(java.util.Collection) StatusListener(gate.event.StatusListener) HashMap(java.util.HashMap) Map(java.util.Map) FeatureMap(gate.FeatureMap) Annotation(gate.Annotation)

Example 15 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class AnnotationSetImpl method getContained.

// get(type, constraints, offset)
/**
 * Select annotations contained within an interval, i.e.
 * those annotations whose start position is
 * >= <code>startOffset</code> and whose end position is &lt;=
 * <code>endOffset</code>.
 */
@Override
public AnnotationSet getContained(Long startOffset, Long endOffset) {
    // check the range
    if (endOffset < startOffset)
        return emptyAS();
    // ensure index
    if (annotsByStartNode == null)
        indexByStartOffset();
    List<Annotation> annotationsToAdd = null;
    Iterator<Node> nodesIter;
    Node currentNode;
    Iterator<Annotation> annotIter;
    // find all the annots that start at or after the start offset but
    // strictly
    // before the end offset
    nodesIter = nodesByOffset.subMap(startOffset, endOffset).values().iterator();
    while (nodesIter.hasNext()) {
        currentNode = nodesIter.next();
        Collection<Annotation> objFromPoint = getAnnotsByStartNode(currentNode.getId());
        if (objFromPoint == null)
            continue;
        // loop through the annotations and find only those that
        // also end before endOffset
        annotIter = objFromPoint.iterator();
        while (annotIter.hasNext()) {
            Annotation annot = annotIter.next();
            if (annot.getEndNode().getOffset().compareTo(endOffset) <= 0) {
                if (annotationsToAdd == null)
                    annotationsToAdd = new ArrayList<Annotation>();
                annotationsToAdd.add(annot);
            }
        }
    }
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
}
Also used : Node(gate.Node) ArrayList(java.util.ArrayList) Annotation(gate.Annotation)

Aggregations

Annotation (gate.Annotation)69 AnnotationSet (gate.AnnotationSet)28 ArrayList (java.util.ArrayList)24 HashMap (java.util.HashMap)15 Node (gate.Node)10 HashSet (java.util.HashSet)10 List (java.util.List)10 FeatureMap (gate.FeatureMap)8 Map (java.util.Map)8 TreeSet (java.util.TreeSet)8 Document (gate.Document)7 InvalidOffsetException (gate.util.InvalidOffsetException)7 Point (java.awt.Point)6 LinkedList (java.util.LinkedList)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)3 Color (java.awt.Color)3 Stack (java.util.Stack)3 TreeMap (java.util.TreeMap)3