use of gate.FeatureMap in project gate-core by GateNLP.
the class TestCreole method testArbitraryMetadata.
// testDefaultRun()
/**
* Test arbitrary metadata elements on resources
*/
public void testArbitraryMetadata() throws Exception {
ResourceData docRd = reg.get("gate.corpora.DocumentImpl");
assertNotNull("testArbitraryMetadata: couldn't find doc res data", docRd);
FeatureMap features = docRd.getFeatures();
String comment = (String) features.get("FUNKY-METADATA-THAING");
assertTrue("testArbitraryMetadata: incorrect FUNKY-METADATA-THAING on document", comment != null && comment.equals("hubba hubba"));
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class TikaFormat method setDocumentFeatures.
private void setDocumentFeatures(Metadata metadata, Document doc) {
FeatureMap fmap = doc.getFeatures();
setTikaFeature(metadata, TikaCoreProperties.TITLE, fmap);
setTikaFeature(metadata, Office.AUTHOR, fmap);
setTikaFeature(metadata, TikaCoreProperties.COMMENTS, fmap);
setTikaFeature(metadata, TikaCoreProperties.CREATOR, fmap);
if (fmap.get("AUTHORS") == null && fmap.get("AUTHOR") != null)
fmap.put("AUTHORS", fmap.get(Office.AUTHOR));
fmap.put("MimeType", metadata.get(Metadata.CONTENT_TYPE));
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class DocumentStaxUtils method readFeatureMap.
/**
* Processes a GateDocumentFeatures or Annotation element to build a
* feature map. The element is expected to contain Feature children,
* each with a Name and Value. The reader will be returned positioned
* on the closing GateDocumentFeatures or Annotation tag.
*
* @throws XMLStreamException
*/
public static FeatureMap readFeatureMap(XMLStreamReader xsr) throws XMLStreamException {
FeatureMap fm = Factory.newFeatureMap();
while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
xsr.require(XMLStreamConstants.START_ELEMENT, null, "Feature");
Object featureName = null;
Object featureValue = null;
while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
if ("Name".equals(xsr.getLocalName())) {
featureName = readFeatureNameOrValue(xsr);
} else if ("Value".equals(xsr.getLocalName())) {
featureValue = readFeatureNameOrValue(xsr);
} else {
throw new XMLStreamException("Feature element should contain " + "only Name and Value children", xsr.getLocation());
}
}
fm.put(featureName, featureValue);
}
return fm;
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class DocumentStaxUtils method readGateXmlDocument.
/**
* Reads GATE XML format data from the given XMLStreamReader and puts
* the content and annotation sets into the given Document, replacing
* its current content. The reader must be positioned on the opening
* GateDocument tag (i.e. the last event was a START_ELEMENT for which
* getLocalName returns "GateDocument"), and when the method returns
* the reader will be left positioned on the corresponding closing
* tag.
*
* @param xsr the source of the XML to parse
* @param doc the document to update
* @param statusListener optional status listener to receive status
* messages
* @throws XMLStreamException
*/
public static void readGateXmlDocument(XMLStreamReader xsr, Document doc, StatusListener statusListener) throws XMLStreamException {
DocumentContent savedContent = null;
// check the precondition
xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocument");
// process the document features
xsr.nextTag();
xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocumentFeatures");
if (statusListener != null) {
statusListener.statusChanged("Reading document features");
}
FeatureMap documentFeatures = readFeatureMap(xsr);
// read document text, building the map of node IDs to offsets
xsr.nextTag();
xsr.require(XMLStreamConstants.START_ELEMENT, null, "TextWithNodes");
Map<Integer, Long> nodeIdToOffsetMap = new HashMap<Integer, Long>();
if (statusListener != null) {
statusListener.statusChanged("Reading document content");
}
String documentText = readTextWithNodes(xsr, nodeIdToOffsetMap);
// save the content, in case anything goes wrong later
savedContent = doc.getContent();
// set the document content to the text with nodes text.
doc.setContent(new DocumentContentImpl(documentText));
try {
int numAnnots = 0;
// process annotation sets, using the node map built above
Integer maxAnnotId = null;
// initially, we don't know whether annotation IDs are required or
// not
Boolean requireAnnotationIds = null;
int eventType = xsr.nextTag();
while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("AnnotationSet")) {
xsr.require(XMLStreamConstants.START_ELEMENT, null, "AnnotationSet");
String annotationSetName = xsr.getAttributeValue(null, "Name");
AnnotationSet annotationSet = null;
if (annotationSetName == null) {
if (statusListener != null) {
statusListener.statusChanged("Reading default annotation set");
}
annotationSet = doc.getAnnotations();
} else {
if (statusListener != null) {
statusListener.statusChanged("Reading \"" + annotationSetName + "\" annotation set");
}
annotationSet = doc.getAnnotations(annotationSetName);
}
annotationSet.clear();
SortedSet<Integer> annotIdsInSet = new TreeSet<Integer>();
requireAnnotationIds = readAnnotationSet(xsr, annotationSet, nodeIdToOffsetMap, annotIdsInSet, requireAnnotationIds);
if (annotIdsInSet.size() > 0 && (maxAnnotId == null || annotIdsInSet.last().intValue() > maxAnnotId.intValue())) {
maxAnnotId = annotIdsInSet.last();
}
numAnnots += annotIdsInSet.size();
// readAnnotationSet leaves reader positioned on the
// </AnnotationSet> tag, so nextTag takes us to either the next
// <AnnotationSet>, a <RelationSet>, or </GateDocument>
eventType = xsr.nextTag();
}
while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("RelationSet")) {
xsr.require(XMLStreamConstants.START_ELEMENT, null, "RelationSet");
String relationSetName = xsr.getAttributeValue(null, "Name");
RelationSet relations = null;
if (relationSetName == null) {
if (statusListener != null) {
statusListener.statusChanged("Reading relation set for default annotation set");
}
relations = doc.getAnnotations().getRelations();
} else {
if (statusListener != null) {
statusListener.statusChanged("Reading relation set for \"" + relationSetName + "\" annotation set");
}
relations = doc.getAnnotations(relationSetName).getRelations();
}
SortedSet<Integer> relIdsInSet = new TreeSet<Integer>();
readRelationSet(xsr, relations, relIdsInSet);
if (relIdsInSet.size() > 0 && (maxAnnotId == null || relIdsInSet.last().intValue() > maxAnnotId.intValue())) {
maxAnnotId = relIdsInSet.last();
}
numAnnots += relIdsInSet.size();
// readAnnotationSet leaves reader positioned on the
// </RelationSet> tag, so nextTag takes us to either the next
// <RelationSet> or to the </GateDocument>
eventType = xsr.nextTag();
}
// check we are on the end document tag
xsr.require(XMLStreamConstants.END_ELEMENT, null, "GateDocument");
doc.setFeatures(documentFeatures);
// set the ID generator, if doc is a DocumentImpl
if (doc instanceof DocumentImpl && maxAnnotId != null) {
((DocumentImpl) doc).setNextAnnotationId(maxAnnotId.intValue() + 1);
}
if (statusListener != null) {
statusListener.statusChanged("Finished. " + numAnnots + " annotation(s) processed");
}
}// in case of exception, reset document content to the unparsed XML
catch (XMLStreamException xse) {
doc.setContent(savedContent);
throw xse;
} catch (RuntimeException re) {
doc.setContent(savedContent);
throw re;
}
}
use of gate.FeatureMap in project gate-core by GateNLP.
the class DocumentStaxUtils method readXcesFeatureMap.
/**
* Processes a struct element to build a feature map. The element is
* expected to contain feat children, each with name and value
* attributes. The reader will be returned positioned on the closing
* struct tag.
*
* @throws XMLStreamException
*/
public static FeatureMap readXcesFeatureMap(XMLStreamReader xsr) throws XMLStreamException {
FeatureMap fm = Factory.newFeatureMap();
while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
xsr.require(XMLStreamConstants.START_ELEMENT, XCES_NAMESPACE, "feat");
String featureName = xsr.getAttributeValue(null, "name");
Object featureValue = xsr.getAttributeValue(null, "value");
fm.put(featureName, featureValue);
// read the (possibly virtual) closing tag of the feat element
xsr.nextTag();
xsr.require(XMLStreamConstants.END_ELEMENT, XCES_NAMESPACE, "feat");
}
return fm;
}
Aggregations