// insertsSafety()
 * This method saves all the annotations from aDumpAnnotSet and combines them
 * with the document content.
 * @param aDumpAnnotSet
 *          is a GATE annotation set prepared to be used on the raw text from
 *          document content. If aDumpAnnotSet is <b>null<b> then an empty
 *          string will be returned.
 * @param includeFeatures
 *          is a boolean, which controls whether the annotation features and
 *          gate ID are included or not.
 * @return The XML document obtained from raw text + the information from the
 *         dump annotation set.
private String saveAnnotationSetAsXml(AnnotationSet aDumpAnnotSet, boolean includeFeatures) {
    String content = null;
    if (this.getContent() == null)
        content = "";
        content = this.getContent().toString();
    StringBuffer docContStrBuff = DocumentXmlUtils.filterNonXmlChars(new StringBuffer(content));
    if (aDumpAnnotSet == null)
        return docContStrBuff.toString();
    TreeMap<Long, Character> offsets2CharsMap = new TreeMap<Long, Character>();
    if (this.getContent().size().longValue() != 0) {
        // Fill the offsets2CharsMap with all the indices where
        // special chars appear
        buildEntityMapFromString(content, offsets2CharsMap);
    // End if
    // The saving alghorithm is as follows:
    // /////////////////////////////////////////
    // Construct a set of annot with all IDs in asc order.
    // All annotations that end at that offset swap their place in descending
    // order. For each node write all the tags from left to right.
    // Construct the node set
    TreeSet<Long> offsets = new TreeSet<Long>();
    Iterator<Annotation> iter = aDumpAnnotSet.iterator();
    while (iter.hasNext()) {
        Annotation annot =;
    // iteration
    while (!offsets.isEmpty()) {
        Long offset = offsets.last();
        // Remove the offset from the set
        // Now, use it.
        // Returns a list with annotations that needs to be serialized in that
        // offset.
        List<Annotation> annotations = getAnnotationsForOffset(aDumpAnnotSet, offset);
        // Attention: the annotation are serialized from left to right
        // StringBuffer tmpBuff = new StringBuffer("");
        StringBuffer tmpBuff = new StringBuffer(DOC_SIZE_MULTIPLICATION_FACTOR_AS * (this.getContent().size().intValue()));
        Stack<Annotation> stack = new Stack<Annotation>();
        // Iterate through all these annotations and serialize them
        Iterator<Annotation> it = annotations.iterator();
        while (it.hasNext()) {
            Annotation a =;
            // Test if a Ends at offset
            if (offset.equals(a.getEndNode().getOffset())) {
                // Test if a Starts at offset
                if (offset.equals(a.getStartNode().getOffset())) {
                    // Here, the annotation a Starts and Ends at the offset
                    if (null != a.getFeatures().get("isEmptyAndSpan") && "true".equals(a.getFeatures().get("isEmptyAndSpan"))) {
                        // Assert: annotation a with start == end and isEmptyAndSpan
                        tmpBuff.append(writeStartTag(a, includeFeatures));
                    } else {
                        // Assert annotation a with start == end and an empty tag
                        // The annotation is removed from dumped set
                // End if
                } else {
                    // In this case empty the stack and write the end tag
                    if (!stack.isEmpty()) {
                        while (!stack.isEmpty()) {
                            Annotation a1 = stack.pop();
                    // End while
                    // End if
            // End if
            } else {
                // at the offset
                if (offset.equals(a.getStartNode().getOffset())) {
                    // In this case empty the stack and write the end tag
                    if (!stack.isEmpty()) {
                        while (!stack.isEmpty()) {
                            Annotation a1 = stack.pop();
                    // End while
                    // End if
                    tmpBuff.append(writeStartTag(a, includeFeatures));
                    // The annotation is removed from dumped set
            // End if ( offset.equals(a.getStartNode().getOffset()) )
        // End if ( offset.equals(a.getEndNode().getOffset()) )
        // In this case empty the stack and write the end tag
        if (!stack.isEmpty()) {
            while (!stack.isEmpty()) {
                Annotation a1 = stack.pop();
        // End while
        // replaced.
        if (!offsets2CharsMap.isEmpty()) {
            Long offsChar = offsets2CharsMap.lastKey();
            while (!offsets2CharsMap.isEmpty() && offsChar.intValue() >= offset.intValue()) {
                // Replace the char at offsChar with its corresponding entity form
                // the entitiesMap.
                docContStrBuff.replace(offsChar.intValue(), offsChar.intValue() + 1, DocumentXmlUtils.entitiesMap.get(offsets2CharsMap.get(offsChar)));
                // Discard the offsChar after it was used.
                // Investigate next offsChar
                if (!offsets2CharsMap.isEmpty())
                    offsChar = offsets2CharsMap.lastKey();
        // End while
        // End if
        // Insert tmpBuff to the location where it belongs in docContStrBuff
        docContStrBuff.insert(offset.intValue(), tmpBuff.toString());
    // replaced
    while (!offsets2CharsMap.isEmpty()) {
        Long offsChar = offsets2CharsMap.lastKey();
        // Replace the char with its entity
        docContStrBuff.replace(offsChar.intValue(), offsChar.intValue() + 1, DocumentXmlUtils.entitiesMap.get(offsets2CharsMap.get(offsChar)));
        // remove the offset from the map
    // End while
    return docContStrBuff.toString();
Also used : TreeMap(java.util.TreeMap) Annotation(gate.Annotation) Stack(java.util.Stack) TreeSet(java.util.TreeSet)

Example 32 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentImpl method identifyTheRootAnnotation.

// End identifyTheRootAnnotation()
private Annotation identifyTheRootAnnotation(List<Annotation> anAnnotationList) {
    if (anAnnotationList == null || anAnnotationList.isEmpty())
        return null;
    // does not have an offset = 0, then there's no root tag.
    if (anAnnotationList.get(0).getStartNode().getOffset().longValue() > 0)
        return null;
    // already know it does), make sure it ends at the end.
    if (anAnnotationList.size() == 1) {
        Annotation onlyAnn = anAnnotationList.get(0);
        if (onlyAnn.getEndNode().getOffset().equals(content.size()))
            return onlyAnn;
        return null;
    // find the limits
    // we know this already
    long start = 0;
    // end = 0 will be improved by the next loop
    long end = 0;
    for (int i = 0; i < anAnnotationList.size(); i++) {
        Annotation anAnnotation = anAnnotationList.get(i);
        long localEnd = anAnnotation.getEndNode().getOffset().longValue();
        if (localEnd > end)
            end = localEnd;
    // Go and find the annotation.
    // look at all annotations that start at 0 and end at end
    // if there are several, choose the one with the smallest ID
    Annotation theRootAnnotation = null;
    for (int i = 0; i < anAnnotationList.size(); i++) {
        Annotation currentAnnot = anAnnotationList.get(i);
        long localStart = currentAnnot.getStartNode().getOffset().longValue();
        long localEnd = currentAnnot.getEndNode().getOffset().longValue();
        // end of the AnnotationSet then check to see if its ID is the smallest.
        if ((start == localStart) && (end == localEnd)) {
            // The currentAnnotation has is a potential root one.
            if (theRootAnnotation == null)
                theRootAnnotation = currentAnnot;
            else {
                // If root's ID is greater that the currentAnnot then update the root
                if (theRootAnnotation.getId().intValue() > currentAnnot.getId().intValue())
                    theRootAnnotation = currentAnnot;
        // End if
    // End if
    // End for
    return theRootAnnotation;
Also used : Annotation(gate.Annotation)

Example 33 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentImpl method insertsSafety.

// End toXml()
 * This method verifies if aSourceAnnotation can ve inserted safety into the
 * aTargetAnnotSet. Safety means that it doesn't violate the crossed over
 * contition with any annotation from the aTargetAnnotSet.
 * @param aTargetAnnotSet
 *          the annotation set to include the aSourceAnnotation
 * @param aSourceAnnotation
 *          the annotation to be inserted into the aTargetAnnotSet
 * @return true if the annotation inserts safety, or false otherwise.
private boolean insertsSafety(AnnotationSet aTargetAnnotSet, Annotation aSourceAnnotation) {
    if (aTargetAnnotSet == null || aSourceAnnotation == null) {
        this.crossedOverAnnotation = null;
        return false;
    if (aSourceAnnotation.getStartNode() == null || aSourceAnnotation.getStartNode().getOffset() == null) {
        this.crossedOverAnnotation = null;
        return false;
    if (aSourceAnnotation.getEndNode() == null || aSourceAnnotation.getEndNode().getOffset() == null) {
        this.crossedOverAnnotation = null;
        return false;
    // Get the start and end offsets
    Long start = aSourceAnnotation.getStartNode().getOffset();
    Long end = aSourceAnnotation.getEndNode().getOffset();
    // Read aSourceAnnotation offsets long
    long s2 = start.longValue();
    long e2 = end.longValue();
    // Obtain a set with all annotations annotations that overlap
    // totaly or partially with the interval defined by the two provided offsets
    AnnotationSet as = aTargetAnnotSet.get(start, end);
    // Investigate all the annotations from as to see if there is one that
    // comes in conflict with aSourceAnnotation
    Iterator<Annotation> it = as.iterator();
    while (it.hasNext()) {
        Annotation ann =;
        // Read ann offsets
        long s1 = ann.getStartNode().getOffset().longValue();
        long e1 = ann.getEndNode().getOffset().longValue();
        if (s1 < s2 && s2 < e1 && e1 < e2) {
            this.crossedOverAnnotation = ann;
            return false;
        if (s2 < s1 && s1 < e2 && e2 < e1) {
            this.crossedOverAnnotation = ann;
            return false;
    // End while
    return true;
Also used : AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 34 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentXmlUtils method annotationSetToXml.

// annotationSetToXml
 * Converts the Annotation set to XML which is appended to the supplied
 * StringBuffer instance. The standard
 * {@link #annotationSetToXml(AnnotationSet, StringBuffer) method} uses the
 * name that belongs to the provided annotation set, however, this method
 * allows one to store the provided annotation set under a different
 * annotation set name.
 * @param anAnnotationSet
 *          the annotation set that has to be saved as XML.
 * @param annotationSetNameToUse
 *          the new name for the annotation set being converted to XML
 * @param buffer
 *          the StringBuffer that the XML representation should be appended to
public static void annotationSetToXml(AnnotationSet anAnnotationSet, String annotationSetNameToUse, StringBuffer buffer) {
    if (anAnnotationSet == null) {
    // End if
    if (annotationSetNameToUse == null || annotationSetNameToUse.trim().length() == 0)
    else {
        buffer.append("<AnnotationSet Name=\"");
        buffer.append("\" >\n");
    Map<String, StringBuffer> convertedKeys = new HashMap<String, StringBuffer>();
    // Iterate through AnnotationSet and save each Annotation as XML
    Iterator<Annotation> iterator = anAnnotationSet.iterator();
    while (iterator.hasNext()) {
        Annotation annot =;
        buffer.append("<Annotation Id=\"");
        buffer.append("\" Type=\"");
        buffer.append("\" StartNode=\"");
        buffer.append("\" EndNode=\"");
        buffer.append(featuresToXml(annot.getFeatures(), convertedKeys));
    // End while
Also used : HashMap(java.util.HashMap) Annotation(gate.Annotation)

Example 35 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentXmlUtils method textWithNodes.

// replaceCharsWithEntities()
 * Returns the document's text interspersed with &lt;Node&gt; elements at all
 * points where the document has an annotation beginning or ending.
public static String textWithNodes(TextualDocument doc, String aText) {
    // getoffsets for XML entities
    if (aText == null)
        return new String("");
    StringBuffer textWithNodes = filterNonXmlChars(new StringBuffer(aText));
    // Construct a map from offsets to Chars ()
    SortedMap<Long, Character> offsets2CharsMap = new TreeMap<Long, Character>();
    if (aText.length() != 0) {
        // Fill the offsets2CharsMap with all the indices where special chars
        // appear
        buildEntityMapFromString(aText, offsets2CharsMap);
    // End if
    // Construct the offsetsSet for all nodes belonging to this document
    SortedSet<Long> offsetsSet = new TreeSet<Long>();
    Iterator<Annotation> annotSetIter = doc.getAnnotations().iterator();
    while (annotSetIter.hasNext()) {
        Annotation annot =;
    // end While
    // Get the nodes from all other named annotation sets.
    Map<String, AnnotationSet> namedAnnotSets = doc.getNamedAnnotationSets();
    if (namedAnnotSets != null) {
        Iterator<AnnotationSet> iter = namedAnnotSets.values().iterator();
        while (iter.hasNext()) {
            AnnotationSet annotSet =;
            Iterator<Annotation> iter2 = annotSet.iterator();
            while (iter2.hasNext()) {
                Annotation annotTmp =;
        // End while
    // End while
    // is a TreeSet
    if (offsetsSet.isEmpty()) {
        return replaceCharsWithEntities(aText).toString();
    // create a large StringBuffer
    StringBuffer modifiedBuffer = new StringBuffer(textWithNodes.length() * 2);
    // last character copied from the original String
    int lastCharactercopied = 0;
    // append to buffer all text up to next offset
    // for node or entity
    // we need to iterate on offsetSet and offsets2CharsMap
    Set<Long> allOffsets = new TreeSet<Long>();
    Iterator<Long> allOffsetsIterator = allOffsets.iterator();
    while (allOffsetsIterator.hasNext()) {
        Long nextOffset =;
        int nextOffsetint = nextOffset.intValue();
        // is there some text to add since last time?
        if (nextOffsetint > lastCharactercopied) {
            modifiedBuffer.append(textWithNodes.substring(lastCharactercopied, nextOffsetint));
            lastCharactercopied = nextOffsetint;
        // do we need to add a node information here?
        if (offsetsSet.contains(nextOffset))
            modifiedBuffer.append("<Node id=\"").append(nextOffsetint).append("\"/>");
        // do we need to convert an XML entity?
        if (offsets2CharsMap.containsKey(nextOffset)) {
            String entityString = entitiesMap.get(offsets2CharsMap.get(nextOffset));
            // skip the character in the original String
            // append the corresponding entity
    // copies the remaining text
    modifiedBuffer.append(textWithNodes.substring(lastCharactercopied, textWithNodes.length()));
    return modifiedBuffer.toString();
Also used : AnnotationSet(gate.AnnotationSet) TreeMap(java.util.TreeMap) Annotation(gate.Annotation) TreeSet(java.util.TreeSet)


