use of gate.Annotation in project gate-core by GateNLP.
the class DocumentJsonUtils method writeDocument.
/**
* Write a substring of a GATE document to the specified
* JsonGenerator. The specified window of document text will be
* written as a property named "text" and the specified annotations
* will be written as "entities", with their offsets adjusted to be
* relative to the specified window.
*
* @param doc the document to write
* @param start the start offset of the segment to write
* @param end the end offset of the segment to write
* @param extraFeatures additional properties to add to the generated
* JSON. If the map includes a "text" key this will be
* ignored, and if it contains a key "entities" whose value
* is a map then these entities will be merged with the
* generated ones derived from the annotationsMap. This would
* typically be used for documents that were originally
* derived from Twitter data, to re-create the original JSON.
* @param annotationTypeProperty if non-null, the annotation type will
* be written as a property under this name, as if it were an
* additional feature of each annotation.
* @param annotationIDProperty if non-null, the annotation ID will
* be written as a property under this name, as if it were an
* additional feature of each annotation.
* @param json the {@link JsonGenerator} to write to.
* @throws JsonGenerationException if a problem occurs while
* generating the JSON
* @throws IOException if an I/O error occurs.
*/
public static void writeDocument(Document doc, Long start, Long end, Map<String, Collection<Annotation>> annotationsMap, Map<?, ?> extraFeatures, String annotationTypeProperty, String annotationIDProperty, JsonGenerator json) throws JsonGenerationException, IOException, InvalidOffsetException {
ObjectWriter writer = MAPPER.writer();
json.writeStartObject();
RepositioningInfo repos = new RepositioningInfo();
String text = escape(doc.getContent().getContent(start, end).toString(), repos);
json.writeStringField("text", text);
json.writeFieldName("entities");
json.writeStartObject();
// if the extraFeatures already includes entities, merge them with
// the new ones we create
Object entitiesExtraFeature = (extraFeatures == null) ? null : extraFeatures.get("entities");
Map<?, ?> entitiesMap = null;
if (entitiesExtraFeature instanceof Map) {
entitiesMap = (Map<?, ?>) entitiesExtraFeature;
}
for (Map.Entry<String, Collection<Annotation>> annsByType : annotationsMap.entrySet()) {
String annotationType = annsByType.getKey();
Collection<Annotation> annotations = annsByType.getValue();
json.writeFieldName(annotationType);
json.writeStartArray();
for (Annotation a : annotations) {
json.writeStartObject();
// indices:[start, end], corrected to match the sub-range of
// text we're writing
json.writeArrayFieldStart("indices");
json.writeNumber(repos.getOriginalPos(a.getStartNode().getOffset() - start, true));
json.writeNumber(repos.getOriginalPos(a.getEndNode().getOffset() - start, false));
// end of indices
json.writeEndArray();
if (annotationTypeProperty != null) {
json.writeStringField(annotationTypeProperty, a.getType());
}
if (annotationIDProperty != null) {
json.writeNumberField(annotationIDProperty, a.getId());
}
// other features
for (Map.Entry<?, ?> feature : a.getFeatures().entrySet()) {
if (annotationTypeProperty != null && annotationTypeProperty.equals(feature.getKey())) {
// annotationTypeProperty
continue;
}
json.writeFieldName(String.valueOf(feature.getKey()));
writer.writeValue(json, feature.getValue());
}
// end of annotation
json.writeEndObject();
}
// add any entities from the extraFeatures map
if (entitiesMap != null && entitiesMap.get(annotationType) instanceof Collection) {
for (Object ent : (Collection<?>) entitiesMap.get(annotationType)) {
writer.writeValue(json, ent);
}
}
json.writeEndArray();
}
if (entitiesMap != null) {
for (Map.Entry<?, ?> entitiesEntry : entitiesMap.entrySet()) {
if (!annotationsMap.containsKey(entitiesEntry.getKey())) {
// not an entity type we've already seen
json.writeFieldName(String.valueOf(entitiesEntry.getKey()));
writer.writeValue(json, entitiesEntry.getValue());
}
}
}
// end of entities
json.writeEndObject();
if (extraFeatures != null) {
for (Map.Entry<?, ?> feature : extraFeatures.entrySet()) {
if ("text".equals(feature.getKey()) || "entities".equals(feature.getKey())) {
// already dealt with text and entities
continue;
}
json.writeFieldName(String.valueOf(feature.getKey()));
writer.writeValue(json, feature.getValue());
}
}
// end of document
json.writeEndObject();
// Make sure that everything we have generated is flushed to the
// underlying OutputStream. It seems that not doing this can easily
// lead to corrupt files that just end in the middle of a JSON
// object. This occurs even if you flush the OutputStream instance
// as the data never leaves the JsonGenerator
json.flush();
}
use of gate.Annotation in project gate-core by GateNLP.
the class DocumentXmlUtils method annotationSetToXml.
// buildEntityMapFromString();
/**
* Converts the Annotation set to XML which is appended to the supplied
* StringBuffer instance.
*
* @param anAnnotationSet
* The annotation set that has to be saved as XML.
* @param buffer
* the StringBuffer that the XML representation should be appended to
*/
public static void annotationSetToXml(AnnotationSet anAnnotationSet, StringBuffer buffer) {
if (anAnnotationSet == null) {
buffer.append("<AnnotationSet>\n");
buffer.append("</AnnotationSet>\n");
return;
}
// End if
if (anAnnotationSet.getName() == null)
buffer.append("<AnnotationSet>\n");
else {
buffer.append("<AnnotationSet Name=\"");
buffer.append(anAnnotationSet.getName());
buffer.append("\" >\n");
}
Map<String, StringBuffer> convertedKeys = new HashMap<String, StringBuffer>();
// Iterate through AnnotationSet and save each Annotation as XML
Iterator<Annotation> iterator = anAnnotationSet.iterator();
while (iterator.hasNext()) {
Annotation annot = iterator.next();
buffer.append("<Annotation Id=\"");
buffer.append(annot.getId());
buffer.append("\" Type=\"");
buffer.append(annot.getType());
buffer.append("\" StartNode=\"");
buffer.append(annot.getStartNode().getOffset());
buffer.append("\" EndNode=\"");
buffer.append(annot.getEndNode().getOffset());
buffer.append("\">\n");
buffer.append(featuresToXml(annot.getFeatures(), convertedKeys));
buffer.append("</Annotation>\n");
}
// End while
buffer.append("</AnnotationSet>\n");
}
use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationSetImpl method readObject.
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
this.longestAnnot = 0l;
ObjectInputStream.GetField gf = in.readFields();
this.name = (String) gf.get("name", null);
this.doc = (DocumentImpl) gf.get("doc", null);
boolean isIndexedByType = false;
boolean isIndexedByStartNode = false;
this.annotations = (Annotation[]) gf.get("annotations", null);
if (this.annotations == null) {
// old style serialised version
@SuppressWarnings("unchecked") Map<Integer, Annotation> annotsByIdMap = (Map<Integer, Annotation>) gf.get("annotsById", null);
if (annotsByIdMap == null)
throw new IOException("Invalid serialised data: neither annotations array or map by id" + " are present.");
annotations = annotsByIdMap.values().toArray(new Annotation[] {});
} else {
// new style serialised version
isIndexedByType = in.readBoolean();
isIndexedByStartNode = in.readBoolean();
}
// this.name = (String)in.readObject();
// this.doc = (DocumentImpl)in.readObject();
// Annotation[] annotations = (Annotation[])in.readObject();
// do we need to create the indices?
// boolean isIndexedByType = in.readBoolean();
// boolean isIndexedByStartNode = in.readBoolean();
this.annotsById = new HashMap<Integer, Annotation>(annotations.length);
// rebuilds the indices if required
if (isIndexedByType) {
annotsByType = new HashMap<String, AnnotationSet>(Gate.HASH_STH_SIZE);
}
if (isIndexedByStartNode) {
nodesByOffset = new RBTreeMap<Long, Node>();
annotsByStartNode = new HashMap<Integer, Object>(annotations.length);
}
// add all the annotations one by one
for (int i = 0; i < annotations.length; i++) {
add(annotations[i]);
}
this.relations = (RelationSet) gf.get("relations", null);
annotations = null;
}
use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationSetImpl method get.
// get(type)
/**
* Select annotations by a set of types. Expects a Set of String.
*
* @return an ImmutableAnnotationSet
*/
@Override
public AnnotationSet get(Set<String> types) throws ClassCastException {
if (annotsByType == null)
indexByType();
Iterator<String> iter = types.iterator();
List<Annotation> annotations = new ArrayList<Annotation>();
while (iter.hasNext()) {
String type = iter.next();
AnnotationSet as = annotsByType.get(type);
if (as != null) {
Iterator<Annotation> iterAnnot = as.iterator();
while (iterAnnot.hasNext()) {
annotations.add(iterAnnot.next());
}
}
}
// while
if (annotations.isEmpty())
return emptyAS();
return new ImmutableAnnotationSetImpl(doc, annotations);
}
use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationSetImpl method inDocumentOrder.
/**
* Return a list of annotations sorted by increasing start offset, i.e. in the order
* they appear in the document. If more than one annotation starts at a specific offset
* the order of these annotations is unspecified.
*
* @return a list of annotations ordered by increasing start offset. If a positional
* index does not exist, it is created.
*/
@Override
public List<Annotation> inDocumentOrder() {
if (annotsByStartNode == null)
indexByStartOffset();
Collection<Node> values = nodesByOffset.values();
List<Annotation> result = new ArrayList<Annotation>();
for (Node nodeObj : values) {
Collection<Annotation> anns = getAnnotsByStartNode(nodeObj.getId());
if (anns != null) {
result.addAll(anns);
}
}
return result;
}
Aggregations