Search in sources :

Example 41 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentJsonUtils method writeDocument.

/**
 * Write a substring of a GATE document to the specified
 * JsonGenerator. The specified window of document text will be
 * written as a property named "text" and the specified annotations
 * will be written as "entities", with their offsets adjusted to be
 * relative to the specified window.
 *
 * @param doc the document to write
 * @param start the start offset of the segment to write
 * @param end the end offset of the segment to write
 * @param extraFeatures additional properties to add to the generated
 *          JSON. If the map includes a "text" key this will be
 *          ignored, and if it contains a key "entities" whose value
 *          is a map then these entities will be merged with the
 *          generated ones derived from the annotationsMap. This would
 *          typically be used for documents that were originally
 *          derived from Twitter data, to re-create the original JSON.
 * @param annotationTypeProperty if non-null, the annotation type will
 *          be written as a property under this name, as if it were an
 *          additional feature of each annotation.
 * @param annotationIDProperty if non-null, the annotation ID will
 *          be written as a property under this name, as if it were an
 *          additional feature of each annotation.
 * @param json the {@link JsonGenerator} to write to.
 * @throws JsonGenerationException if a problem occurs while
 *           generating the JSON
 * @throws IOException if an I/O error occurs.
 */
public static void writeDocument(Document doc, Long start, Long end, Map<String, Collection<Annotation>> annotationsMap, Map<?, ?> extraFeatures, String annotationTypeProperty, String annotationIDProperty, JsonGenerator json) throws JsonGenerationException, IOException, InvalidOffsetException {
    ObjectWriter writer = MAPPER.writer();
    json.writeStartObject();
    RepositioningInfo repos = new RepositioningInfo();
    String text = escape(doc.getContent().getContent(start, end).toString(), repos);
    json.writeStringField("text", text);
    json.writeFieldName("entities");
    json.writeStartObject();
    // if the extraFeatures already includes entities, merge them with
    // the new ones we create
    Object entitiesExtraFeature = (extraFeatures == null) ? null : extraFeatures.get("entities");
    Map<?, ?> entitiesMap = null;
    if (entitiesExtraFeature instanceof Map) {
        entitiesMap = (Map<?, ?>) entitiesExtraFeature;
    }
    for (Map.Entry<String, Collection<Annotation>> annsByType : annotationsMap.entrySet()) {
        String annotationType = annsByType.getKey();
        Collection<Annotation> annotations = annsByType.getValue();
        json.writeFieldName(annotationType);
        json.writeStartArray();
        for (Annotation a : annotations) {
            json.writeStartObject();
            // indices:[start, end], corrected to match the sub-range of
            // text we're writing
            json.writeArrayFieldStart("indices");
            json.writeNumber(repos.getOriginalPos(a.getStartNode().getOffset() - start, true));
            json.writeNumber(repos.getOriginalPos(a.getEndNode().getOffset() - start, false));
            // end of indices
            json.writeEndArray();
            if (annotationTypeProperty != null) {
                json.writeStringField(annotationTypeProperty, a.getType());
            }
            if (annotationIDProperty != null) {
                json.writeNumberField(annotationIDProperty, a.getId());
            }
            // other features
            for (Map.Entry<?, ?> feature : a.getFeatures().entrySet()) {
                if (annotationTypeProperty != null && annotationTypeProperty.equals(feature.getKey())) {
                    // annotationTypeProperty
                    continue;
                }
                json.writeFieldName(String.valueOf(feature.getKey()));
                writer.writeValue(json, feature.getValue());
            }
            // end of annotation
            json.writeEndObject();
        }
        // add any entities from the extraFeatures map
        if (entitiesMap != null && entitiesMap.get(annotationType) instanceof Collection) {
            for (Object ent : (Collection<?>) entitiesMap.get(annotationType)) {
                writer.writeValue(json, ent);
            }
        }
        json.writeEndArray();
    }
    if (entitiesMap != null) {
        for (Map.Entry<?, ?> entitiesEntry : entitiesMap.entrySet()) {
            if (!annotationsMap.containsKey(entitiesEntry.getKey())) {
                // not an entity type we've already seen
                json.writeFieldName(String.valueOf(entitiesEntry.getKey()));
                writer.writeValue(json, entitiesEntry.getValue());
            }
        }
    }
    // end of entities
    json.writeEndObject();
    if (extraFeatures != null) {
        for (Map.Entry<?, ?> feature : extraFeatures.entrySet()) {
            if ("text".equals(feature.getKey()) || "entities".equals(feature.getKey())) {
                // already dealt with text and entities
                continue;
            }
            json.writeFieldName(String.valueOf(feature.getKey()));
            writer.writeValue(json, feature.getValue());
        }
    }
    // end of document
    json.writeEndObject();
    // Make sure that everything we have generated is flushed to the
    // underlying OutputStream. It seems that not doing this can easily
    // lead to corrupt files that just end in the middle of a JSON
    // object. This occurs even if you flush the OutputStream instance
    // as the data never leaves the JsonGenerator
    json.flush();
}
Also used : ObjectWriter(com.fasterxml.jackson.databind.ObjectWriter) Collection(java.util.Collection) Map(java.util.Map) Annotation(gate.Annotation)

Example 42 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class DocumentXmlUtils method annotationSetToXml.

// buildEntityMapFromString();
/**
 * Converts the Annotation set to XML which is appended to the supplied
 * StringBuffer instance.
 *
 * @param anAnnotationSet
 *          The annotation set that has to be saved as XML.
 * @param buffer
 *          the StringBuffer that the XML representation should be appended to
 */
public static void annotationSetToXml(AnnotationSet anAnnotationSet, StringBuffer buffer) {
    if (anAnnotationSet == null) {
        buffer.append("<AnnotationSet>\n");
        buffer.append("</AnnotationSet>\n");
        return;
    }
    // End if
    if (anAnnotationSet.getName() == null)
        buffer.append("<AnnotationSet>\n");
    else {
        buffer.append("<AnnotationSet Name=\"");
        buffer.append(anAnnotationSet.getName());
        buffer.append("\" >\n");
    }
    Map<String, StringBuffer> convertedKeys = new HashMap<String, StringBuffer>();
    // Iterate through AnnotationSet and save each Annotation as XML
    Iterator<Annotation> iterator = anAnnotationSet.iterator();
    while (iterator.hasNext()) {
        Annotation annot = iterator.next();
        buffer.append("<Annotation Id=\"");
        buffer.append(annot.getId());
        buffer.append("\" Type=\"");
        buffer.append(annot.getType());
        buffer.append("\" StartNode=\"");
        buffer.append(annot.getStartNode().getOffset());
        buffer.append("\" EndNode=\"");
        buffer.append(annot.getEndNode().getOffset());
        buffer.append("\">\n");
        buffer.append(featuresToXml(annot.getFeatures(), convertedKeys));
        buffer.append("</Annotation>\n");
    }
    // End while
    buffer.append("</AnnotationSet>\n");
}
Also used : HashMap(java.util.HashMap) Annotation(gate.Annotation)

Example 43 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class AnnotationSetImpl method readObject.

private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
    this.longestAnnot = 0l;
    ObjectInputStream.GetField gf = in.readFields();
    this.name = (String) gf.get("name", null);
    this.doc = (DocumentImpl) gf.get("doc", null);
    boolean isIndexedByType = false;
    boolean isIndexedByStartNode = false;
    this.annotations = (Annotation[]) gf.get("annotations", null);
    if (this.annotations == null) {
        // old style serialised version
        @SuppressWarnings("unchecked") Map<Integer, Annotation> annotsByIdMap = (Map<Integer, Annotation>) gf.get("annotsById", null);
        if (annotsByIdMap == null)
            throw new IOException("Invalid serialised data: neither annotations array or map by id" + " are present.");
        annotations = annotsByIdMap.values().toArray(new Annotation[] {});
    } else {
        // new style serialised version
        isIndexedByType = in.readBoolean();
        isIndexedByStartNode = in.readBoolean();
    }
    // this.name = (String)in.readObject();
    // this.doc = (DocumentImpl)in.readObject();
    // Annotation[] annotations = (Annotation[])in.readObject();
    // do we need to create the indices?
    // boolean isIndexedByType = in.readBoolean();
    // boolean isIndexedByStartNode = in.readBoolean();
    this.annotsById = new HashMap<Integer, Annotation>(annotations.length);
    // rebuilds the indices if required
    if (isIndexedByType) {
        annotsByType = new HashMap<String, AnnotationSet>(Gate.HASH_STH_SIZE);
    }
    if (isIndexedByStartNode) {
        nodesByOffset = new RBTreeMap<Long, Node>();
        annotsByStartNode = new HashMap<Integer, Object>(annotations.length);
    }
    // add all the annotations one by one
    for (int i = 0; i < annotations.length; i++) {
        add(annotations[i]);
    }
    this.relations = (RelationSet) gf.get("relations", null);
    annotations = null;
}
Also used : Node(gate.Node) AnnotationSet(gate.AnnotationSet) IOException(java.io.IOException) Annotation(gate.Annotation) HashMap(java.util.HashMap) Map(java.util.Map) FeatureMap(gate.FeatureMap) RBTreeMap(gate.util.RBTreeMap) ObjectInputStream(java.io.ObjectInputStream)

Example 44 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class AnnotationSetImpl method get.

// get(type)
/**
 * Select annotations by a set of types. Expects a Set of String.
 *
 * @return an ImmutableAnnotationSet
 */
@Override
public AnnotationSet get(Set<String> types) throws ClassCastException {
    if (annotsByType == null)
        indexByType();
    Iterator<String> iter = types.iterator();
    List<Annotation> annotations = new ArrayList<Annotation>();
    while (iter.hasNext()) {
        String type = iter.next();
        AnnotationSet as = annotsByType.get(type);
        if (as != null) {
            Iterator<Annotation> iterAnnot = as.iterator();
            while (iterAnnot.hasNext()) {
                annotations.add(iterAnnot.next());
            }
        }
    }
    // while
    if (annotations.isEmpty())
        return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotations);
}
Also used : ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 45 with Annotation

use of gate.Annotation in project gate-core by GateNLP.

the class AnnotationSetImpl method inDocumentOrder.

/**
 * Return a list of annotations sorted by increasing start offset, i.e. in the order
 * they appear in the document. If more than one annotation starts at a specific offset
 * the order of these annotations is unspecified.
 *
 * @return a list of annotations ordered by increasing start offset. If a positional
 * index does not exist, it is created.
 */
@Override
public List<Annotation> inDocumentOrder() {
    if (annotsByStartNode == null)
        indexByStartOffset();
    Collection<Node> values = nodesByOffset.values();
    List<Annotation> result = new ArrayList<Annotation>();
    for (Node nodeObj : values) {
        Collection<Annotation> anns = getAnnotsByStartNode(nodeObj.getId());
        if (anns != null) {
            result.addAll(anns);
        }
    }
    return result;
}
Also used : Node(gate.Node) ArrayList(java.util.ArrayList) Annotation(gate.Annotation)

Aggregations

Annotation (gate.Annotation)69 AnnotationSet (gate.AnnotationSet)28 ArrayList (java.util.ArrayList)24 HashMap (java.util.HashMap)15 Node (gate.Node)10 HashSet (java.util.HashSet)10 List (java.util.List)10 FeatureMap (gate.FeatureMap)8 Map (java.util.Map)8 TreeSet (java.util.TreeSet)8 Document (gate.Document)7 InvalidOffsetException (gate.util.InvalidOffsetException)7 Point (java.awt.Point)6 LinkedList (java.util.LinkedList)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)3 Color (java.awt.Color)3 Stack (java.util.Stack)3 TreeMap (java.util.TreeMap)3