use of gate.Annotation in project gate-core by GateNLP.
the class DocumentImpl method saveAnnotationSetAsXml.
// insertsSafety()
/**
* This method saves all the annotations from aDumpAnnotSet and combines them
* with the document content.
*
* @param aDumpAnnotSet
* is a GATE annotation set prepared to be used on the raw text from
* document content. If aDumpAnnotSet is <b>null<b> then an empty
* string will be returned.
* @param includeFeatures
* is a boolean, which controls whether the annotation features and
* gate ID are included or not.
* @return The XML document obtained from raw text + the information from the
* dump annotation set.
*/
@SuppressWarnings("unused")
private String saveAnnotationSetAsXml(AnnotationSet aDumpAnnotSet, boolean includeFeatures) {
String content = null;
if (this.getContent() == null)
content = "";
else
content = this.getContent().toString();
StringBuffer docContStrBuff = DocumentXmlUtils.filterNonXmlChars(new StringBuffer(content));
if (aDumpAnnotSet == null)
return docContStrBuff.toString();
TreeMap<Long, Character> offsets2CharsMap = new TreeMap<Long, Character>();
if (this.getContent().size().longValue() != 0) {
// Fill the offsets2CharsMap with all the indices where
// special chars appear
buildEntityMapFromString(content, offsets2CharsMap);
}
// End if
// The saving alghorithm is as follows:
// /////////////////////////////////////////
// Construct a set of annot with all IDs in asc order.
// All annotations that end at that offset swap their place in descending
// order. For each node write all the tags from left to right.
// Construct the node set
TreeSet<Long> offsets = new TreeSet<Long>();
Iterator<Annotation> iter = aDumpAnnotSet.iterator();
while (iter.hasNext()) {
Annotation annot = iter.next();
offsets.add(annot.getStartNode().getOffset());
offsets.add(annot.getEndNode().getOffset());
}
// iteration
while (!offsets.isEmpty()) {
Long offset = offsets.last();
// Remove the offset from the set
offsets.remove(offset);
// Now, use it.
// Returns a list with annotations that needs to be serialized in that
// offset.
List<Annotation> annotations = getAnnotationsForOffset(aDumpAnnotSet, offset);
// Attention: the annotation are serialized from left to right
// StringBuffer tmpBuff = new StringBuffer("");
StringBuffer tmpBuff = new StringBuffer(DOC_SIZE_MULTIPLICATION_FACTOR_AS * (this.getContent().size().intValue()));
Stack<Annotation> stack = new Stack<Annotation>();
// Iterate through all these annotations and serialize them
Iterator<Annotation> it = annotations.iterator();
while (it.hasNext()) {
Annotation a = it.next();
it.remove();
// Test if a Ends at offset
if (offset.equals(a.getEndNode().getOffset())) {
// Test if a Starts at offset
if (offset.equals(a.getStartNode().getOffset())) {
// Here, the annotation a Starts and Ends at the offset
if (null != a.getFeatures().get("isEmptyAndSpan") && "true".equals(a.getFeatures().get("isEmptyAndSpan"))) {
// Assert: annotation a with start == end and isEmptyAndSpan
tmpBuff.append(writeStartTag(a, includeFeatures));
stack.push(a);
} else {
// Assert annotation a with start == end and an empty tag
tmpBuff.append(writeEmptyTag(a));
// The annotation is removed from dumped set
aDumpAnnotSet.remove(a);
}
// End if
} else {
// In this case empty the stack and write the end tag
if (!stack.isEmpty()) {
while (!stack.isEmpty()) {
Annotation a1 = stack.pop();
tmpBuff.append(writeEndTag(a1));
}
// End while
}
// End if
tmpBuff.append(writeEndTag(a));
}
// End if
} else {
// at the offset
if (offset.equals(a.getStartNode().getOffset())) {
// In this case empty the stack and write the end tag
if (!stack.isEmpty()) {
while (!stack.isEmpty()) {
Annotation a1 = stack.pop();
tmpBuff.append(writeEndTag(a1));
}
// End while
}
// End if
tmpBuff.append(writeStartTag(a, includeFeatures));
// The annotation is removed from dumped set
aDumpAnnotSet.remove(a);
}
// End if ( offset.equals(a.getStartNode().getOffset()) )
}
// End if ( offset.equals(a.getEndNode().getOffset()) )
}
// In this case empty the stack and write the end tag
if (!stack.isEmpty()) {
while (!stack.isEmpty()) {
Annotation a1 = stack.pop();
tmpBuff.append(writeEndTag(a1));
}
// End while
}
// replaced.
if (!offsets2CharsMap.isEmpty()) {
Long offsChar = offsets2CharsMap.lastKey();
while (!offsets2CharsMap.isEmpty() && offsChar.intValue() >= offset.intValue()) {
// Replace the char at offsChar with its corresponding entity form
// the entitiesMap.
docContStrBuff.replace(offsChar.intValue(), offsChar.intValue() + 1, DocumentXmlUtils.entitiesMap.get(offsets2CharsMap.get(offsChar)));
// Discard the offsChar after it was used.
offsets2CharsMap.remove(offsChar);
// Investigate next offsChar
if (!offsets2CharsMap.isEmpty())
offsChar = offsets2CharsMap.lastKey();
}
// End while
}
// End if
// Insert tmpBuff to the location where it belongs in docContStrBuff
docContStrBuff.insert(offset.intValue(), tmpBuff.toString());
}
// replaced
while (!offsets2CharsMap.isEmpty()) {
Long offsChar = offsets2CharsMap.lastKey();
// Replace the char with its entity
docContStrBuff.replace(offsChar.intValue(), offsChar.intValue() + 1, DocumentXmlUtils.entitiesMap.get(offsets2CharsMap.get(offsChar)));
// remove the offset from the map
offsets2CharsMap.remove(offsChar);
}
// End while
return docContStrBuff.toString();
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentImpl method identifyTheRootAnnotation.
// End identifyTheRootAnnotation()
private Annotation identifyTheRootAnnotation(List<Annotation> anAnnotationList) {
if (anAnnotationList == null || anAnnotationList.isEmpty())
return null;
// does not have an offset = 0, then there's no root tag.
if (anAnnotationList.get(0).getStartNode().getOffset().longValue() > 0)
return null;
// already know it does), make sure it ends at the end.
if (anAnnotationList.size() == 1) {
Annotation onlyAnn = anAnnotationList.get(0);
if (onlyAnn.getEndNode().getOffset().equals(content.size()))
return onlyAnn;
return null;
}
// find the limits
// we know this already
long start = 0;
// end = 0 will be improved by the next loop
long end = 0;
for (int i = 0; i < anAnnotationList.size(); i++) {
Annotation anAnnotation = anAnnotationList.get(i);
long localEnd = anAnnotation.getEndNode().getOffset().longValue();
if (localEnd > end)
end = localEnd;
}
// Go and find the annotation.
// look at all annotations that start at 0 and end at end
// if there are several, choose the one with the smallest ID
Annotation theRootAnnotation = null;
for (int i = 0; i < anAnnotationList.size(); i++) {
Annotation currentAnnot = anAnnotationList.get(i);
long localStart = currentAnnot.getStartNode().getOffset().longValue();
long localEnd = currentAnnot.getEndNode().getOffset().longValue();
// end of the AnnotationSet then check to see if its ID is the smallest.
if ((start == localStart) && (end == localEnd)) {
// The currentAnnotation has is a potential root one.
if (theRootAnnotation == null)
theRootAnnotation = currentAnnot;
else {
// If root's ID is greater that the currentAnnot then update the root
if (theRootAnnotation.getId().intValue() > currentAnnot.getId().intValue())
theRootAnnotation = currentAnnot;
}
// End if
}
// End if
}
// End for
return theRootAnnotation;
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentImpl method insertsSafety.
// End toXml()
/**
* This method verifies if aSourceAnnotation can ve inserted safety into the
* aTargetAnnotSet. Safety means that it doesn't violate the crossed over
* contition with any annotation from the aTargetAnnotSet.
*
* @param aTargetAnnotSet
* the annotation set to include the aSourceAnnotation
* @param aSourceAnnotation
* the annotation to be inserted into the aTargetAnnotSet
* @return true if the annotation inserts safety, or false otherwise.
*/
private boolean insertsSafety(AnnotationSet aTargetAnnotSet, Annotation aSourceAnnotation) {
if (aTargetAnnotSet == null || aSourceAnnotation == null) {
this.crossedOverAnnotation = null;
return false;
}
if (aSourceAnnotation.getStartNode() == null || aSourceAnnotation.getStartNode().getOffset() == null) {
this.crossedOverAnnotation = null;
return false;
}
if (aSourceAnnotation.getEndNode() == null || aSourceAnnotation.getEndNode().getOffset() == null) {
this.crossedOverAnnotation = null;
return false;
}
// Get the start and end offsets
Long start = aSourceAnnotation.getStartNode().getOffset();
Long end = aSourceAnnotation.getEndNode().getOffset();
// Read aSourceAnnotation offsets long
long s2 = start.longValue();
long e2 = end.longValue();
// Obtain a set with all annotations annotations that overlap
// totaly or partially with the interval defined by the two provided offsets
AnnotationSet as = aTargetAnnotSet.get(start, end);
// Investigate all the annotations from as to see if there is one that
// comes in conflict with aSourceAnnotation
Iterator<Annotation> it = as.iterator();
while (it.hasNext()) {
Annotation ann = it.next();
// Read ann offsets
long s1 = ann.getStartNode().getOffset().longValue();
long e1 = ann.getEndNode().getOffset().longValue();
if (s1 < s2 && s2 < e1 && e1 < e2) {
this.crossedOverAnnotation = ann;
return false;
}
if (s2 < s1 && s1 < e2 && e2 < e1) {
this.crossedOverAnnotation = ann;
return false;
}
}
// End while
return true;
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentXmlUtils method annotationSetToXml.
// annotationSetToXml
/**
* Converts the Annotation set to XML which is appended to the supplied
* StringBuffer instance. The standard
* {@link #annotationSetToXml(AnnotationSet, StringBuffer) method} uses the
* name that belongs to the provided annotation set, however, this method
* allows one to store the provided annotation set under a different
* annotation set name.
*
* @param anAnnotationSet
* the annotation set that has to be saved as XML.
* @param annotationSetNameToUse
* the new name for the annotation set being converted to XML
* @param buffer
* the StringBuffer that the XML representation should be appended to
*/
public static void annotationSetToXml(AnnotationSet anAnnotationSet, String annotationSetNameToUse, StringBuffer buffer) {
if (anAnnotationSet == null) {
buffer.append("<AnnotationSet>\n");
buffer.append("</AnnotationSet>\n");
return;
}
// End if
if (annotationSetNameToUse == null || annotationSetNameToUse.trim().length() == 0)
buffer.append("<AnnotationSet>\n");
else {
buffer.append("<AnnotationSet Name=\"");
buffer.append(annotationSetNameToUse);
buffer.append("\" >\n");
}
Map<String, StringBuffer> convertedKeys = new HashMap<String, StringBuffer>();
// Iterate through AnnotationSet and save each Annotation as XML
Iterator<Annotation> iterator = anAnnotationSet.iterator();
while (iterator.hasNext()) {
Annotation annot = iterator.next();
buffer.append("<Annotation Id=\"");
buffer.append(annot.getId());
buffer.append("\" Type=\"");
buffer.append(annot.getType());
buffer.append("\" StartNode=\"");
buffer.append(annot.getStartNode().getOffset());
buffer.append("\" EndNode=\"");
buffer.append(annot.getEndNode().getOffset());
buffer.append("\">\n");
buffer.append(featuresToXml(annot.getFeatures(), convertedKeys));
buffer.append("</Annotation>\n");
}
// End while
buffer.append("</AnnotationSet>\n");
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentXmlUtils method textWithNodes.
// replaceCharsWithEntities()
/**
* Returns the document's text interspersed with <Node> elements at all
* points where the document has an annotation beginning or ending.
*/
public static String textWithNodes(TextualDocument doc, String aText) {
// getoffsets for XML entities
if (aText == null)
return new String("");
StringBuffer textWithNodes = filterNonXmlChars(new StringBuffer(aText));
// Construct a map from offsets to Chars ()
SortedMap<Long, Character> offsets2CharsMap = new TreeMap<Long, Character>();
if (aText.length() != 0) {
// Fill the offsets2CharsMap with all the indices where special chars
// appear
buildEntityMapFromString(aText, offsets2CharsMap);
}
// End if
// Construct the offsetsSet for all nodes belonging to this document
SortedSet<Long> offsetsSet = new TreeSet<Long>();
Iterator<Annotation> annotSetIter = doc.getAnnotations().iterator();
while (annotSetIter.hasNext()) {
Annotation annot = annotSetIter.next();
offsetsSet.add(annot.getStartNode().getOffset());
offsetsSet.add(annot.getEndNode().getOffset());
}
// end While
// Get the nodes from all other named annotation sets.
Map<String, AnnotationSet> namedAnnotSets = doc.getNamedAnnotationSets();
if (namedAnnotSets != null) {
Iterator<AnnotationSet> iter = namedAnnotSets.values().iterator();
while (iter.hasNext()) {
AnnotationSet annotSet = iter.next();
Iterator<Annotation> iter2 = annotSet.iterator();
while (iter2.hasNext()) {
Annotation annotTmp = iter2.next();
offsetsSet.add(annotTmp.getStartNode().getOffset());
offsetsSet.add(annotTmp.getEndNode().getOffset());
}
// End while
}
// End while
}
// is a TreeSet
if (offsetsSet.isEmpty()) {
return replaceCharsWithEntities(aText).toString();
}
// create a large StringBuffer
StringBuffer modifiedBuffer = new StringBuffer(textWithNodes.length() * 2);
// last character copied from the original String
int lastCharactercopied = 0;
// append to buffer all text up to next offset
// for node or entity
// we need to iterate on offsetSet and offsets2CharsMap
Set<Long> allOffsets = new TreeSet<Long>();
allOffsets.addAll(offsetsSet);
allOffsets.addAll(offsets2CharsMap.keySet());
Iterator<Long> allOffsetsIterator = allOffsets.iterator();
while (allOffsetsIterator.hasNext()) {
Long nextOffset = allOffsetsIterator.next();
int nextOffsetint = nextOffset.intValue();
// is there some text to add since last time?
if (nextOffsetint > lastCharactercopied) {
modifiedBuffer.append(textWithNodes.substring(lastCharactercopied, nextOffsetint));
lastCharactercopied = nextOffsetint;
}
// do we need to add a node information here?
if (offsetsSet.contains(nextOffset))
modifiedBuffer.append("<Node id=\"").append(nextOffsetint).append("\"/>");
// do we need to convert an XML entity?
if (offsets2CharsMap.containsKey(nextOffset)) {
String entityString = entitiesMap.get(offsets2CharsMap.get(nextOffset));
// skip the character in the original String
lastCharactercopied++;
// append the corresponding entity
modifiedBuffer.append(entityString);
}
}
// copies the remaining text
modifiedBuffer.append(textWithNodes.substring(lastCharactercopied, textWithNodes.length()));
return modifiedBuffer.toString();
}
Aggregations