Search in sources :

Example 26 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class TestCreole method testArbitraryMetadata.

// testDefaultRun()
 * Test arbitrary metadata elements on resources
public void testArbitraryMetadata() throws Exception {
    ResourceData docRd = reg.get("gate.corpora.DocumentImpl");
    assertNotNull("testArbitraryMetadata: couldn't find doc res data", docRd);
    FeatureMap features = docRd.getFeatures();
    String comment = (String) features.get("FUNKY-METADATA-THAING");
    assertTrue("testArbitraryMetadata: incorrect FUNKY-METADATA-THAING on document", comment != null && comment.equals("hubba hubba"));
Also used : FeatureMap(gate.FeatureMap)

Example 27 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class TikaFormat method setDocumentFeatures.

private void setDocumentFeatures(Metadata metadata, Document doc) {
    FeatureMap fmap = doc.getFeatures();
    setTikaFeature(metadata, TikaCoreProperties.TITLE, fmap);
    setTikaFeature(metadata, Office.AUTHOR, fmap);
    setTikaFeature(metadata, TikaCoreProperties.COMMENTS, fmap);
    setTikaFeature(metadata, TikaCoreProperties.CREATOR, fmap);
    if (fmap.get("AUTHORS") == null && fmap.get("AUTHOR") != null)
        fmap.put("AUTHORS", fmap.get(Office.AUTHOR));
    fmap.put("MimeType", metadata.get(Metadata.CONTENT_TYPE));
Also used : FeatureMap(gate.FeatureMap)

Example 28 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method readFeatureMap.

 * Processes a GateDocumentFeatures or Annotation element to build a
 * feature map. The element is expected to contain Feature children,
 * each with a Name and Value. The reader will be returned positioned
 * on the closing GateDocumentFeatures or Annotation tag.
 * @throws XMLStreamException
public static FeatureMap readFeatureMap(XMLStreamReader xsr) throws XMLStreamException {
    FeatureMap fm = Factory.newFeatureMap();
    while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
        xsr.require(XMLStreamConstants.START_ELEMENT, null, "Feature");
        Object featureName = null;
        Object featureValue = null;
        while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
            if ("Name".equals(xsr.getLocalName())) {
                featureName = readFeatureNameOrValue(xsr);
            } else if ("Value".equals(xsr.getLocalName())) {
                featureValue = readFeatureNameOrValue(xsr);
            } else {
                throw new XMLStreamException("Feature element should contain " + "only Name and Value children", xsr.getLocation());
        fm.put(featureName, featureValue);
    return fm;
Also used : FeatureMap(gate.FeatureMap) XMLStreamException(

Example 29 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method readGateXmlDocument.

 * Reads GATE XML format data from the given XMLStreamReader and puts
 * the content and annotation sets into the given Document, replacing
 * its current content. The reader must be positioned on the opening
 * GateDocument tag (i.e. the last event was a START_ELEMENT for which
 * getLocalName returns "GateDocument"), and when the method returns
 * the reader will be left positioned on the corresponding closing
 * tag.
 * @param xsr the source of the XML to parse
 * @param doc the document to update
 * @param statusListener optional status listener to receive status
 *          messages
 * @throws XMLStreamException
public static void readGateXmlDocument(XMLStreamReader xsr, Document doc, StatusListener statusListener) throws XMLStreamException {
    DocumentContent savedContent = null;
    // check the precondition
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocument");
    // process the document features
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocumentFeatures");
    if (statusListener != null) {
        statusListener.statusChanged("Reading document features");
    FeatureMap documentFeatures = readFeatureMap(xsr);
    // read document text, building the map of node IDs to offsets
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "TextWithNodes");
    Map<Integer, Long> nodeIdToOffsetMap = new HashMap<Integer, Long>();
    if (statusListener != null) {
        statusListener.statusChanged("Reading document content");
    String documentText = readTextWithNodes(xsr, nodeIdToOffsetMap);
    // save the content, in case anything goes wrong later
    savedContent = doc.getContent();
    // set the document content to the text with nodes text.
    doc.setContent(new DocumentContentImpl(documentText));
    try {
        int numAnnots = 0;
        // process annotation sets, using the node map built above
        Integer maxAnnotId = null;
        // initially, we don't know whether annotation IDs are required or
        // not
        Boolean requireAnnotationIds = null;
        int eventType = xsr.nextTag();
        while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("AnnotationSet")) {
            xsr.require(XMLStreamConstants.START_ELEMENT, null, "AnnotationSet");
            String annotationSetName = xsr.getAttributeValue(null, "Name");
            AnnotationSet annotationSet = null;
            if (annotationSetName == null) {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading default annotation set");
                annotationSet = doc.getAnnotations();
            } else {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading \"" + annotationSetName + "\" annotation set");
                annotationSet = doc.getAnnotations(annotationSetName);
            SortedSet<Integer> annotIdsInSet = new TreeSet<Integer>();
            requireAnnotationIds = readAnnotationSet(xsr, annotationSet, nodeIdToOffsetMap, annotIdsInSet, requireAnnotationIds);
            if (annotIdsInSet.size() > 0 && (maxAnnotId == null || annotIdsInSet.last().intValue() > maxAnnotId.intValue())) {
                maxAnnotId = annotIdsInSet.last();
            numAnnots += annotIdsInSet.size();
            // readAnnotationSet leaves reader positioned on the
            // </AnnotationSet> tag, so nextTag takes us to either the next
            // <AnnotationSet>, a <RelationSet>, or </GateDocument>
            eventType = xsr.nextTag();
        while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("RelationSet")) {
            xsr.require(XMLStreamConstants.START_ELEMENT, null, "RelationSet");
            String relationSetName = xsr.getAttributeValue(null, "Name");
            RelationSet relations = null;
            if (relationSetName == null) {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading relation set for default annotation set");
                relations = doc.getAnnotations().getRelations();
            } else {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading relation set for \"" + relationSetName + "\" annotation set");
                relations = doc.getAnnotations(relationSetName).getRelations();
            SortedSet<Integer> relIdsInSet = new TreeSet<Integer>();
            readRelationSet(xsr, relations, relIdsInSet);
            if (relIdsInSet.size() > 0 && (maxAnnotId == null || relIdsInSet.last().intValue() > maxAnnotId.intValue())) {
                maxAnnotId = relIdsInSet.last();
            numAnnots += relIdsInSet.size();
            // readAnnotationSet leaves reader positioned on the
            // </RelationSet> tag, so nextTag takes us to either the next
            // <RelationSet> or to the </GateDocument>
            eventType = xsr.nextTag();
        // check we are on the end document tag
        xsr.require(XMLStreamConstants.END_ELEMENT, null, "GateDocument");
        // set the ID generator, if doc is a DocumentImpl
        if (doc instanceof DocumentImpl && maxAnnotId != null) {
            ((DocumentImpl) doc).setNextAnnotationId(maxAnnotId.intValue() + 1);
        if (statusListener != null) {
            statusListener.statusChanged("Finished.  " + numAnnots + " annotation(s) processed");
    }// in case of exception, reset document content to the unparsed XML
     catch (XMLStreamException xse) {
        throw xse;
    } catch (RuntimeException re) {
        throw re;
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) FeatureMap(gate.FeatureMap) GateRuntimeException(gate.util.GateRuntimeException) XMLStreamException( DocumentContent(gate.DocumentContent) TreeSet(java.util.TreeSet) RelationSet(gate.relations.RelationSet)

Example 30 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method readXcesFeatureMap.

 * Processes a struct element to build a feature map. The element is
 * expected to contain feat children, each with name and value
 * attributes. The reader will be returned positioned on the closing
 * struct tag.
 * @throws XMLStreamException
public static FeatureMap readXcesFeatureMap(XMLStreamReader xsr) throws XMLStreamException {
    FeatureMap fm = Factory.newFeatureMap();
    while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
        xsr.require(XMLStreamConstants.START_ELEMENT, XCES_NAMESPACE, "feat");
        String featureName = xsr.getAttributeValue(null, "name");
        Object featureValue = xsr.getAttributeValue(null, "value");
        fm.put(featureName, featureValue);
        // read the (possibly virtual) closing tag of the feat element
        xsr.require(XMLStreamConstants.END_ELEMENT, XCES_NAMESPACE, "feat");
    return fm;
Also used : FeatureMap(gate.FeatureMap)


FeatureMap (gate.FeatureMap)55 Document (gate.Document)15 URL ( ResourceInstantiationException (gate.creole.ResourceInstantiationException)11 File ( Resource (gate.Resource)8 GateRuntimeException (gate.util.GateRuntimeException)7 ArrayList (java.util.ArrayList)7 List (java.util.List)7 PersistenceException (gate.persist.PersistenceException)6 Annotation (gate.Annotation)5 AnnotationSet (gate.AnnotationSet)5 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 TestDocument (gate.corpora.TestDocument)4 ResourceData (gate.creole.ResourceData)4 SerialDataStore (gate.persist.SerialDataStore)4 InvalidOffsetException (gate.util.InvalidOffsetException)4 Corpus (gate.Corpus)3 ProcessingResource (gate.ProcessingResource)3