use of gate.Annotation in project gate-core by GateNLP.
the class TestDiffer method testDiffer.
// tearDown
public void testDiffer() throws Exception {
Document doc = Factory.newDocument(new URL(gate.corpora.TestDocument.getTestServerName() + "tests/ft-bt-03-aug-2001.html"), "windows-1252");
AnnotationSet annSet = doc.getAnnotations();
// create 100 annotations
FeatureMap features = Factory.newFeatureMap();
features.put("type", "BAR");
for (int i = 0; i < 100; i++) {
annSet.add(new Long(i * 10), new Long((i + 1) * 10), "Foo", features);
}
List<Annotation> keySet = new ArrayList<Annotation>(annSet);
List<Annotation> responseSet = new ArrayList<Annotation>(annSet);
// check 100% Precision and recall
AnnotationDiffer differ = new AnnotationDiffer();
differ.setSignificantFeaturesSet(null);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
double value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getPrecisionLenient();
Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallLenient();
Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
// check low precision
Integer id = annSet.add(new Long(2), new Long(4), "Foo", features);
Annotation falsePositive = annSet.get(id);
responseSet.add(falsePositive);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of .99!", .99, value, .001);
// recall should still be 100%
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getRecallLenient();
Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
// check low recall
responseSet.remove(falsePositive);
keySet.add(falsePositive);
differ.calculateDiff(keySet, responseSet);
differ.sanityCheck();
if (DEBUG)
differ.printMissmatches();
value = differ.getRecallStrict();
Assert.assertEquals("Recall Strict: " + value + " instead of .99!", .99, value, .001);
// precision should still be 100%
value = differ.getPrecisionStrict();
Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
value = differ.getPrecisionLenient();
Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
}
use of gate.Annotation in project gate-core by GateNLP.
the class TestXml method verifyIDConsistency.
// End of verifyAnnotationIDGenerator()
/**
* Verifies if the two maps hold annotations with the same ID. The only thing not checked
* are the features, as some of them could be lost in the serialization/deserialization process
* @param origAnnotMap A map by ID, containing the original annotations
* @param reloadedAnnMap A map by ID, containing the recreated annotations
*/
private void verifyIDConsistency(Map<Integer, Annotation> origAnnotMap, Map<Integer, Annotation> reloadedAnnMap) {
assertEquals("Found a different number of annot in both documents.", origAnnotMap.keySet().size(), reloadedAnnMap.keySet().size());
for (Iterator<Integer> it = origAnnotMap.keySet().iterator(); it.hasNext(); ) {
Integer id = it.next();
Annotation origAnn = origAnnotMap.get(id);
Annotation reloadedAnnot = reloadedAnnMap.get(id);
assertTrue("Annotation with ID=" + id + " was not found in the reloaded document.", reloadedAnnot != null);
compareAnnot(origAnn, reloadedAnnot);
}
// End for
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentStaxUtils method readXces.
/**
* Read XML data in <a href="http://www.xces.org/">XCES</a> format
* from the given reader and add the corresponding annotations to the
* given annotation set. The reader must be positioned on the starting
* <code>cesAna</code> tag and will be left pointing to the
* corresponding end tag.
*
* @param xsr the XMLStreamReader to read from.
* @param as the annotation set to read into.
* @throws XMLStreamException
*/
public static void readXces(XMLStreamReader xsr, AnnotationSet as) throws XMLStreamException {
xsr.require(XMLStreamConstants.START_ELEMENT, XCES_NAMESPACE, "cesAna");
// Set of all annotation IDs in this set.
Set<Integer> allAnnotIds = new TreeSet<Integer>();
// pre-populate with the IDs of any existing annotations in the set
for (Annotation a : as) {
allAnnotIds.add(a.getId());
}
// lists to collect the annotations in before adding them to the
// set. We collect the annotations that specify and ID (via
// struct/@n) in one list and those that don't in another, so we can
// add the identified ones first, then the others will take the next
// available ID
List<AnnotationObject> collectedIdentifiedAnnots = new ArrayList<AnnotationObject>();
List<AnnotationObject> collectedNonIdentifiedAnnots = new ArrayList<AnnotationObject>();
while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
xsr.require(XMLStreamConstants.START_ELEMENT, XCES_NAMESPACE, "struct");
AnnotationObject annObj = new AnnotationObject();
annObj.setElemName(xsr.getAttributeValue(null, "type"));
try {
annObj.setStart(Long.valueOf(xsr.getAttributeValue(null, "from")));
} catch (NumberFormatException nfe) {
throw new XMLStreamException("Non-integer value found for struct/@from", xsr.getLocation());
}
try {
annObj.setEnd(Long.valueOf(xsr.getAttributeValue(null, "to")));
} catch (NumberFormatException nfe) {
throw new XMLStreamException("Non-integer value found for struct/@to", xsr.getLocation());
}
String annotIdString = xsr.getAttributeValue(null, "n");
if (annotIdString != null) {
try {
Integer annotationId = Integer.valueOf(annotIdString);
if (allAnnotIds.contains(annotationId)) {
throw new XMLStreamException("Annotation IDs must be unique " + "within an annotation set. Found duplicate ID", xsr.getLocation());
}
allAnnotIds.add(annotationId);
annObj.setId(annotationId);
} catch (NumberFormatException nfe) {
throw new XMLStreamException("Non-integer annotation ID found", xsr.getLocation());
}
}
// get the features of this annotation
annObj.setFM(readXcesFeatureMap(xsr));
// readFeatureMap leaves xsr on the </Annotation> tag
if (annObj.getId() != null) {
collectedIdentifiedAnnots.add(annObj);
} else {
collectedNonIdentifiedAnnots.add(annObj);
}
}
// finished reading, add the annotations to the set
AnnotationObject a = null;
try {
// first the ones that specify an ID
Iterator<AnnotationObject> it = collectedIdentifiedAnnots.iterator();
while (it.hasNext()) {
a = it.next();
as.add(a.getId(), a.getStart(), a.getEnd(), a.getElemName(), a.getFM());
}
// next the ones that don't
it = collectedNonIdentifiedAnnots.iterator();
while (it.hasNext()) {
a = it.next();
as.add(a.getStart(), a.getEnd(), a.getElemName(), a.getFM());
}
} catch (InvalidOffsetException ioe) {
throw new XMLStreamException("Invalid offset when creating annotation " + a, ioe);
}
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentStaxUtils method writeDocument.
/**
* Write the specified GATE Document to an XMLStreamWriter. This
* method writes just the GateDocument element - the XML declaration
* must be filled in by the caller if required.
*
* @param doc the Document to write
* @param annotationSets the annotations to include. If the map
* contains an entry for the key <code>null</code>, this
* will be treated as the default set. All other entries are
* treated as named annotation sets.
* @param xsw the StAX XMLStreamWriter to use for output
* @throws GateException if an error occurs during writing
*/
public static void writeDocument(Document doc, Map<String, Collection<Annotation>> annotationSets, XMLStreamWriter xsw, String namespaceURI) throws XMLStreamException {
xsw.setDefaultNamespace(namespaceURI);
xsw.writeStartElement(namespaceURI, "GateDocument");
xsw.writeAttribute("version", GATE_XML_VERSION);
if (namespaceURI.length() > 0) {
xsw.writeDefaultNamespace(namespaceURI);
}
newLine(xsw);
// features
xsw.writeComment(" The document's features");
newLine(xsw);
newLine(xsw);
xsw.writeStartElement(namespaceURI, "GateDocumentFeatures");
newLine(xsw);
writeFeatures(doc.getFeatures(), xsw, namespaceURI);
// GateDocumentFeatures
xsw.writeEndElement();
newLine(xsw);
// text with nodes
xsw.writeComment(" The document content area with serialized nodes ");
newLine(xsw);
newLine(xsw);
writeTextWithNodes(doc, annotationSets.values(), xsw, namespaceURI);
newLine(xsw);
// Serialize as XML all document's annotation sets
// Serialize the default AnnotationSet
StatusListener sListener = (StatusListener) gate.Gate.getListeners().get("gate.event.StatusListener");
if (annotationSets.containsKey(null)) {
if (sListener != null)
sListener.statusChanged("Saving the default annotation set ");
xsw.writeComment(" The default annotation set ");
newLine(xsw);
newLine(xsw);
writeAnnotationSet(annotationSets.get(null), null, xsw, namespaceURI);
newLine(xsw);
}
// while(iter.hasNext()) {
for (Map.Entry<String, Collection<Annotation>> entry : annotationSets.entrySet()) {
// iter.next();
String annotationSetName = entry.getKey();
// above
if (annotationSetName != null) {
// annotationSets.get(annotationSetName);
Collection<Annotation> annots = entry.getValue();
xsw.writeComment(" Named annotation set ");
newLine(xsw);
newLine(xsw);
// Serialize it as XML
if (sListener != null)
sListener.statusChanged("Saving " + annotationSetName + " annotation set ");
writeAnnotationSet(annots, annotationSetName, xsw, namespaceURI);
newLine(xsw);
}
// End if
}
// End while
Iterator<String> iter = annotationSets.keySet().iterator();
while (iter.hasNext()) {
writeRelationSet(doc.getAnnotations(iter.next()).getRelations(), xsw, namespaceURI);
}
// close the GateDocument element
xsw.writeEndElement();
newLine(xsw);
}
use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationSetImpl method getContained.
// get(type, constraints, offset)
/**
* Select annotations contained within an interval, i.e.
* those annotations whose start position is
* >= <code>startOffset</code> and whose end position is <=
* <code>endOffset</code>.
*/
@Override
public AnnotationSet getContained(Long startOffset, Long endOffset) {
// check the range
if (endOffset < startOffset)
return emptyAS();
// ensure index
if (annotsByStartNode == null)
indexByStartOffset();
List<Annotation> annotationsToAdd = null;
Iterator<Node> nodesIter;
Node currentNode;
Iterator<Annotation> annotIter;
// find all the annots that start at or after the start offset but
// strictly
// before the end offset
nodesIter = nodesByOffset.subMap(startOffset, endOffset).values().iterator();
while (nodesIter.hasNext()) {
currentNode = nodesIter.next();
Collection<Annotation> objFromPoint = getAnnotsByStartNode(currentNode.getId());
if (objFromPoint == null)
continue;
// loop through the annotations and find only those that
// also end before endOffset
annotIter = objFromPoint.iterator();
while (annotIter.hasNext()) {
Annotation annot = annotIter.next();
if (annot.getEndNode().getOffset().compareTo(endOffset) <= 0) {
if (annotationsToAdd == null)
annotationsToAdd = new ArrayList<Annotation>();
annotationsToAdd.add(annot);
}
}
}
return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
}
Aggregations