Search in sources :

Example 31 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentImpl method saveAnnotationSetAsXml.

// insertsSafety()
/**
 * This method saves all the annotations from aDumpAnnotSet and combines them
 * with the document content.
 *
 * @param aDumpAnnotSet
 *          is a GATE annotation set prepared to be used on the raw text from
 *          document content. If aDumpAnnotSet is <b>null<b> then an empty
 *          string will be returned.
 * @param includeFeatures
 *          is a boolean, which controls whether the annotation features and
 *          gate ID are included or not.
 * @return The XML document obtained from raw text + the information from the
 *         dump annotation set.
 */
@SuppressWarnings("unused")
private String saveAnnotationSetAsXml(AnnotationSet aDumpAnnotSet, boolean includeFeatures) {
    String content = null;
    if (this.getContent() == null)
        content = "";
    else
        content = this.getContent().toString();
    StringBuffer docContStrBuff = DocumentXmlUtils.filterNonXmlChars(new StringBuffer(content));
    if (aDumpAnnotSet == null)
        return docContStrBuff.toString();
    TreeMap<Long, Character> offsets2CharsMap = new TreeMap<Long, Character>();
    if (this.getContent().size().longValue() != 0) {
        // Fill the offsets2CharsMap with all the indices where
        // special chars appear
        buildEntityMapFromString(content, offsets2CharsMap);
    }
    // End if
    // The saving alghorithm is as follows:
    // /////////////////////////////////////////
    // Construct a set of annot with all IDs in asc order.
    // All annotations that end at that offset swap their place in descending
    // order. For each node write all the tags from left to right.
    // Construct the node set
    TreeSet<Long> offsets = new TreeSet<Long>();
    Iterator<Annotation> iter = aDumpAnnotSet.iterator();
    while (iter.hasNext()) {
        Annotation annot = iter.next();
        offsets.add(annot.getStartNode().getOffset());
        offsets.add(annot.getEndNode().getOffset());
    }
    // iteration
    while (!offsets.isEmpty()) {
        Long offset = offsets.last();
        // Remove the offset from the set
        offsets.remove(offset);
        // Now, use it.
        // Returns a list with annotations that needs to be serialized in that
        // offset.
        List<Annotation> annotations = getAnnotationsForOffset(aDumpAnnotSet, offset);
        // Attention: the annotation are serialized from left to right
        // StringBuffer tmpBuff = new StringBuffer("");
        StringBuffer tmpBuff = new StringBuffer(DOC_SIZE_MULTIPLICATION_FACTOR_AS * (this.getContent().size().intValue()));
        Stack<Annotation> stack = new Stack<Annotation>();
        // Iterate through all these annotations and serialize them
        Iterator<Annotation> it = annotations.iterator();
        while (it.hasNext()) {
            Annotation a = it.next();
            it.remove();
            // Test if a Ends at offset
            if (offset.equals(a.getEndNode().getOffset())) {
                // Test if a Starts at offset
                if (offset.equals(a.getStartNode().getOffset())) {
                    // Here, the annotation a Starts and Ends at the offset
                    if (null != a.getFeatures().get("isEmptyAndSpan") && "true".equals(a.getFeatures().get("isEmptyAndSpan"))) {
                        // Assert: annotation a with start == end and isEmptyAndSpan
                        tmpBuff.append(writeStartTag(a, includeFeatures));
                        stack.push(a);
                    } else {
                        // Assert annotation a with start == end and an empty tag
                        tmpBuff.append(writeEmptyTag(a));
                        // The annotation is removed from dumped set
                        aDumpAnnotSet.remove(a);
                    }
                // End if
                } else {
                    // In this case empty the stack and write the end tag
                    if (!stack.isEmpty()) {
                        while (!stack.isEmpty()) {
                            Annotation a1 = stack.pop();
                            tmpBuff.append(writeEndTag(a1));
                        }
                    // End while
                    }
                    // End if
                    tmpBuff.append(writeEndTag(a));
                }
            // End if
            } else {
                // at the offset
                if (offset.equals(a.getStartNode().getOffset())) {
                    // In this case empty the stack and write the end tag
                    if (!stack.isEmpty()) {
                        while (!stack.isEmpty()) {
                            Annotation a1 = stack.pop();
                            tmpBuff.append(writeEndTag(a1));
                        }
                    // End while
                    }
                    // End if
                    tmpBuff.append(writeStartTag(a, includeFeatures));
                    // The annotation is removed from dumped set
                    aDumpAnnotSet.remove(a);
                }
            // End if ( offset.equals(a.getStartNode().getOffset()) )
            }
        // End if ( offset.equals(a.getEndNode().getOffset()) )
        }
        // In this case empty the stack and write the end tag
        if (!stack.isEmpty()) {
            while (!stack.isEmpty()) {
                Annotation a1 = stack.pop();
                tmpBuff.append(writeEndTag(a1));
            }
        // End while
        }
        // replaced.
        if (!offsets2CharsMap.isEmpty()) {
            Long offsChar = offsets2CharsMap.lastKey();
            while (!offsets2CharsMap.isEmpty() && offsChar.intValue() >= offset.intValue()) {
                // Replace the char at offsChar with its corresponding entity form
                // the entitiesMap.
                docContStrBuff.replace(offsChar.intValue(), offsChar.intValue() + 1, DocumentXmlUtils.entitiesMap.get(offsets2CharsMap.get(offsChar)));
                // Discard the offsChar after it was used.
                offsets2CharsMap.remove(offsChar);
                // Investigate next offsChar
                if (!offsets2CharsMap.isEmpty())
                    offsChar = offsets2CharsMap.lastKey();
            }
        // End while
        }
        // End if
        // Insert tmpBuff to the location where it belongs in docContStrBuff
        docContStrBuff.insert(offset.intValue(), tmpBuff.toString());
    }
    // replaced
    while (!offsets2CharsMap.isEmpty()) {
        Long offsChar = offsets2CharsMap.lastKey();
        // Replace the char with its entity
        docContStrBuff.replace(offsChar.intValue(), offsChar.intValue() + 1, DocumentXmlUtils.entitiesMap.get(offsets2CharsMap.get(offsChar)));
        // remove the offset from the map
        offsets2CharsMap.remove(offsChar);
    }
    // End while
    return docContStrBuff.toString();
}
Also used : TreeMap(java.util.TreeMap) Annotation(gate.Annotation) Stack(java.util.Stack) TreeSet(java.util.TreeSet)

Example 32 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentImpl method identifyTheRootAnnotation.

// End identifyTheRootAnnotation()
private Annotation identifyTheRootAnnotation(List<Annotation> anAnnotationList) {
    if (anAnnotationList == null || anAnnotationList.isEmpty())
        return null;
    // does not have an offset = 0, then there's no root tag.
    if (anAnnotationList.get(0).getStartNode().getOffset().longValue() > 0)
        return null;
    // already know it does), make sure it ends at the end.
    if (anAnnotationList.size() == 1) {
        Annotation onlyAnn = anAnnotationList.get(0);
        if (onlyAnn.getEndNode().getOffset().equals(content.size()))
            return onlyAnn;
        return null;
    }
    // find the limits
    // we know this already
    long start = 0;
    // end = 0 will be improved by the next loop
    long end = 0;
    for (int i = 0; i < anAnnotationList.size(); i++) {
        Annotation anAnnotation = anAnnotationList.get(i);
        long localEnd = anAnnotation.getEndNode().getOffset().longValue();
        if (localEnd > end)
            end = localEnd;
    }
    // Go and find the annotation.
    // look at all annotations that start at 0 and end at end
    // if there are several, choose the one with the smallest ID
    Annotation theRootAnnotation = null;
    for (int i = 0; i < anAnnotationList.size(); i++) {
        Annotation currentAnnot = anAnnotationList.get(i);
        long localStart = currentAnnot.getStartNode().getOffset().longValue();
        long localEnd = currentAnnot.getEndNode().getOffset().longValue();
        // end of the AnnotationSet then check to see if its ID is the smallest.
        if ((start == localStart) && (end == localEnd)) {
            // The currentAnnotation has is a potential root one.
            if (theRootAnnotation == null)
                theRootAnnotation = currentAnnot;
            else {
                // If root's ID is greater that the currentAnnot then update the root
                if (theRootAnnotation.getId().intValue() > currentAnnot.getId().intValue())
                    theRootAnnotation = currentAnnot;
            }
        // End if
        }
    // End if
    }
    // End for
    return theRootAnnotation;
}
Also used : Annotation(gate.Annotation)

Example 33 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentImpl method insertsSafety.

// End toXml()
/**
 * This method verifies if aSourceAnnotation can ve inserted safety into the
 * aTargetAnnotSet. Safety means that it doesn't violate the crossed over
 * contition with any annotation from the aTargetAnnotSet.
 *
 * @param aTargetAnnotSet
 *          the annotation set to include the aSourceAnnotation
 * @param aSourceAnnotation
 *          the annotation to be inserted into the aTargetAnnotSet
 * @return true if the annotation inserts safety, or false otherwise.
 */
private boolean insertsSafety(AnnotationSet aTargetAnnotSet, Annotation aSourceAnnotation) {
    if (aTargetAnnotSet == null || aSourceAnnotation == null) {
        this.crossedOverAnnotation = null;
        return false;
    }
    if (aSourceAnnotation.getStartNode() == null || aSourceAnnotation.getStartNode().getOffset() == null) {
        this.crossedOverAnnotation = null;
        return false;
    }
    if (aSourceAnnotation.getEndNode() == null || aSourceAnnotation.getEndNode().getOffset() == null) {
        this.crossedOverAnnotation = null;
        return false;
    }
    // Get the start and end offsets
    Long start = aSourceAnnotation.getStartNode().getOffset();
    Long end = aSourceAnnotation.getEndNode().getOffset();
    // Read aSourceAnnotation offsets long
    long s2 = start.longValue();
    long e2 = end.longValue();
    // Obtain a set with all annotations annotations that overlap
    // totaly or partially with the interval defined by the two provided offsets
    AnnotationSet as = aTargetAnnotSet.get(start, end);
    // Investigate all the annotations from as to see if there is one that
    // comes in conflict with aSourceAnnotation
    Iterator<Annotation> it = as.iterator();
    while (it.hasNext()) {
        Annotation ann = it.next();
        // Read ann offsets
        long s1 = ann.getStartNode().getOffset().longValue();
        long e1 = ann.getEndNode().getOffset().longValue();
        if (s1 < s2 && s2 < e1 && e1 < e2) {
            this.crossedOverAnnotation = ann;
            return false;
        }
        if (s2 < s1 && s1 < e2 && e2 < e1) {
            this.crossedOverAnnotation = ann;
            return false;
        }
    }
    // End while
    return true;
}
Also used : AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 34 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentXmlUtils method annotationSetToXml.

// annotationSetToXml
/**
 * Converts the Annotation set to XML which is appended to the supplied
 * StringBuffer instance. The standard
 * {@link #annotationSetToXml(AnnotationSet, StringBuffer) method} uses the
 * name that belongs to the provided annotation set, however, this method
 * allows one to store the provided annotation set under a different
 * annotation set name.
 *
 * @param anAnnotationSet
 *          the annotation set that has to be saved as XML.
 * @param annotationSetNameToUse
 *          the new name for the annotation set being converted to XML
 * @param buffer
 *          the StringBuffer that the XML representation should be appended to
 */
public static void annotationSetToXml(AnnotationSet anAnnotationSet, String annotationSetNameToUse, StringBuffer buffer) {
    if (anAnnotationSet == null) {
        buffer.append("<AnnotationSet>\n");
        buffer.append("</AnnotationSet>\n");
        return;
    }
    // End if
    if (annotationSetNameToUse == null || annotationSetNameToUse.trim().length() == 0)
        buffer.append("<AnnotationSet>\n");
    else {
        buffer.append("<AnnotationSet Name=\"");
        buffer.append(annotationSetNameToUse);
        buffer.append("\" >\n");
    }
    Map<String, StringBuffer> convertedKeys = new HashMap<String, StringBuffer>();
    // Iterate through AnnotationSet and save each Annotation as XML
    Iterator<Annotation> iterator = anAnnotationSet.iterator();
    while (iterator.hasNext()) {
        Annotation annot = iterator.next();
        buffer.append("<Annotation Id=\"");
        buffer.append(annot.getId());
        buffer.append("\" Type=\"");
        buffer.append(annot.getType());
        buffer.append("\" StartNode=\"");
        buffer.append(annot.getStartNode().getOffset());
        buffer.append("\" EndNode=\"");
        buffer.append(annot.getEndNode().getOffset());
        buffer.append("\">\n");
        buffer.append(featuresToXml(annot.getFeatures(), convertedKeys));
        buffer.append("</Annotation>\n");
    }
    // End while
    buffer.append("</AnnotationSet>\n");
}
Also used : HashMap(java.util.HashMap) Annotation(gate.Annotation)

Example 35 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentXmlUtils method textWithNodes.

// replaceCharsWithEntities()
/**
 * Returns the document's text interspersed with &lt;Node&gt; elements at all
 * points where the document has an annotation beginning or ending.
 */
public static String textWithNodes(TextualDocument doc, String aText) {
    // getoffsets for XML entities
    if (aText == null)
        return new String("");
    StringBuffer textWithNodes = filterNonXmlChars(new StringBuffer(aText));
    // Construct a map from offsets to Chars ()
    SortedMap<Long, Character> offsets2CharsMap = new TreeMap<Long, Character>();
    if (aText.length() != 0) {
        // Fill the offsets2CharsMap with all the indices where special chars
        // appear
        buildEntityMapFromString(aText, offsets2CharsMap);
    }
    // End if
    // Construct the offsetsSet for all nodes belonging to this document
    SortedSet<Long> offsetsSet = new TreeSet<Long>();
    Iterator<Annotation> annotSetIter = doc.getAnnotations().iterator();
    while (annotSetIter.hasNext()) {
        Annotation annot = annotSetIter.next();
        offsetsSet.add(annot.getStartNode().getOffset());
        offsetsSet.add(annot.getEndNode().getOffset());
    }
    // end While
    // Get the nodes from all other named annotation sets.
    Map<String, AnnotationSet> namedAnnotSets = doc.getNamedAnnotationSets();
    if (namedAnnotSets != null) {
        Iterator<AnnotationSet> iter = namedAnnotSets.values().iterator();
        while (iter.hasNext()) {
            AnnotationSet annotSet = iter.next();
            Iterator<Annotation> iter2 = annotSet.iterator();
            while (iter2.hasNext()) {
                Annotation annotTmp = iter2.next();
                offsetsSet.add(annotTmp.getStartNode().getOffset());
                offsetsSet.add(annotTmp.getEndNode().getOffset());
            }
        // End while
        }
    // End while
    }
    // is a TreeSet
    if (offsetsSet.isEmpty()) {
        return replaceCharsWithEntities(aText).toString();
    }
    // create a large StringBuffer
    StringBuffer modifiedBuffer = new StringBuffer(textWithNodes.length() * 2);
    // last character copied from the original String
    int lastCharactercopied = 0;
    // append to buffer all text up to next offset
    // for node or entity
    // we need to iterate on offsetSet and offsets2CharsMap
    Set<Long> allOffsets = new TreeSet<Long>();
    allOffsets.addAll(offsetsSet);
    allOffsets.addAll(offsets2CharsMap.keySet());
    Iterator<Long> allOffsetsIterator = allOffsets.iterator();
    while (allOffsetsIterator.hasNext()) {
        Long nextOffset = allOffsetsIterator.next();
        int nextOffsetint = nextOffset.intValue();
        // is there some text to add since last time?
        if (nextOffsetint > lastCharactercopied) {
            modifiedBuffer.append(textWithNodes.substring(lastCharactercopied, nextOffsetint));
            lastCharactercopied = nextOffsetint;
        }
        // do we need to add a node information here?
        if (offsetsSet.contains(nextOffset))
            modifiedBuffer.append("<Node id=\"").append(nextOffsetint).append("\"/>");
        // do we need to convert an XML entity?
        if (offsets2CharsMap.containsKey(nextOffset)) {
            String entityString = entitiesMap.get(offsets2CharsMap.get(nextOffset));
            // skip the character in the original String
            lastCharactercopied++;
            // append the corresponding entity
            modifiedBuffer.append(entityString);
        }
    }
    // copies the remaining text
    modifiedBuffer.append(textWithNodes.substring(lastCharactercopied, textWithNodes.length()));
    return modifiedBuffer.toString();
}
Also used : AnnotationSet(gate.AnnotationSet) TreeMap(java.util.TreeMap) Annotation(gate.Annotation) TreeSet(java.util.TreeSet)

Aggregations

Annotation (gate.Annotation)69 AnnotationSet (gate.AnnotationSet)28 ArrayList (java.util.ArrayList)24 HashMap (java.util.HashMap)15 Node (gate.Node)10 HashSet (java.util.HashSet)10 List (java.util.List)10 FeatureMap (gate.FeatureMap)8 Map (java.util.Map)8 TreeSet (java.util.TreeSet)8 Document (gate.Document)7 InvalidOffsetException (gate.util.InvalidOffsetException)7 Point (java.awt.Point)6 LinkedList (java.util.LinkedList)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)3 Color (java.awt.Color)3 Stack (java.util.Stack)3 TreeMap (java.util.TreeMap)3