Search in sources :

Example 26 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class TestCreole method testArbitraryMetadata.

// testDefaultRun()
/**
 * Test arbitrary metadata elements on resources
 */
public void testArbitraryMetadata() throws Exception {
    ResourceData docRd = reg.get("gate.corpora.DocumentImpl");
    assertNotNull("testArbitraryMetadata: couldn't find doc res data", docRd);
    FeatureMap features = docRd.getFeatures();
    String comment = (String) features.get("FUNKY-METADATA-THAING");
    assertTrue("testArbitraryMetadata: incorrect FUNKY-METADATA-THAING on document", comment != null && comment.equals("hubba hubba"));
}
Also used : FeatureMap(gate.FeatureMap)

Example 27 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class TikaFormat method setDocumentFeatures.

private void setDocumentFeatures(Metadata metadata, Document doc) {
    FeatureMap fmap = doc.getFeatures();
    setTikaFeature(metadata, TikaCoreProperties.TITLE, fmap);
    setTikaFeature(metadata, Office.AUTHOR, fmap);
    setTikaFeature(metadata, TikaCoreProperties.COMMENTS, fmap);
    setTikaFeature(metadata, TikaCoreProperties.CREATOR, fmap);
    if (fmap.get("AUTHORS") == null && fmap.get("AUTHOR") != null)
        fmap.put("AUTHORS", fmap.get(Office.AUTHOR));
    fmap.put("MimeType", metadata.get(Metadata.CONTENT_TYPE));
}
Also used : FeatureMap(gate.FeatureMap)

Example 28 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method readFeatureMap.

/**
 * Processes a GateDocumentFeatures or Annotation element to build a
 * feature map. The element is expected to contain Feature children,
 * each with a Name and Value. The reader will be returned positioned
 * on the closing GateDocumentFeatures or Annotation tag.
 *
 * @throws XMLStreamException
 */
public static FeatureMap readFeatureMap(XMLStreamReader xsr) throws XMLStreamException {
    FeatureMap fm = Factory.newFeatureMap();
    while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
        xsr.require(XMLStreamConstants.START_ELEMENT, null, "Feature");
        Object featureName = null;
        Object featureValue = null;
        while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
            if ("Name".equals(xsr.getLocalName())) {
                featureName = readFeatureNameOrValue(xsr);
            } else if ("Value".equals(xsr.getLocalName())) {
                featureValue = readFeatureNameOrValue(xsr);
            } else {
                throw new XMLStreamException("Feature element should contain " + "only Name and Value children", xsr.getLocation());
            }
        }
        fm.put(featureName, featureValue);
    }
    return fm;
}
Also used : FeatureMap(gate.FeatureMap) XMLStreamException(javax.xml.stream.XMLStreamException)

Example 29 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method readGateXmlDocument.

/**
 * Reads GATE XML format data from the given XMLStreamReader and puts
 * the content and annotation sets into the given Document, replacing
 * its current content. The reader must be positioned on the opening
 * GateDocument tag (i.e. the last event was a START_ELEMENT for which
 * getLocalName returns "GateDocument"), and when the method returns
 * the reader will be left positioned on the corresponding closing
 * tag.
 *
 * @param xsr the source of the XML to parse
 * @param doc the document to update
 * @param statusListener optional status listener to receive status
 *          messages
 * @throws XMLStreamException
 */
public static void readGateXmlDocument(XMLStreamReader xsr, Document doc, StatusListener statusListener) throws XMLStreamException {
    DocumentContent savedContent = null;
    // check the precondition
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocument");
    // process the document features
    xsr.nextTag();
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocumentFeatures");
    if (statusListener != null) {
        statusListener.statusChanged("Reading document features");
    }
    FeatureMap documentFeatures = readFeatureMap(xsr);
    // read document text, building the map of node IDs to offsets
    xsr.nextTag();
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "TextWithNodes");
    Map<Integer, Long> nodeIdToOffsetMap = new HashMap<Integer, Long>();
    if (statusListener != null) {
        statusListener.statusChanged("Reading document content");
    }
    String documentText = readTextWithNodes(xsr, nodeIdToOffsetMap);
    // save the content, in case anything goes wrong later
    savedContent = doc.getContent();
    // set the document content to the text with nodes text.
    doc.setContent(new DocumentContentImpl(documentText));
    try {
        int numAnnots = 0;
        // process annotation sets, using the node map built above
        Integer maxAnnotId = null;
        // initially, we don't know whether annotation IDs are required or
        // not
        Boolean requireAnnotationIds = null;
        int eventType = xsr.nextTag();
        while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("AnnotationSet")) {
            xsr.require(XMLStreamConstants.START_ELEMENT, null, "AnnotationSet");
            String annotationSetName = xsr.getAttributeValue(null, "Name");
            AnnotationSet annotationSet = null;
            if (annotationSetName == null) {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading default annotation set");
                }
                annotationSet = doc.getAnnotations();
            } else {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading \"" + annotationSetName + "\" annotation set");
                }
                annotationSet = doc.getAnnotations(annotationSetName);
            }
            annotationSet.clear();
            SortedSet<Integer> annotIdsInSet = new TreeSet<Integer>();
            requireAnnotationIds = readAnnotationSet(xsr, annotationSet, nodeIdToOffsetMap, annotIdsInSet, requireAnnotationIds);
            if (annotIdsInSet.size() > 0 && (maxAnnotId == null || annotIdsInSet.last().intValue() > maxAnnotId.intValue())) {
                maxAnnotId = annotIdsInSet.last();
            }
            numAnnots += annotIdsInSet.size();
            // readAnnotationSet leaves reader positioned on the
            // </AnnotationSet> tag, so nextTag takes us to either the next
            // <AnnotationSet>, a <RelationSet>, or </GateDocument>
            eventType = xsr.nextTag();
        }
        while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("RelationSet")) {
            xsr.require(XMLStreamConstants.START_ELEMENT, null, "RelationSet");
            String relationSetName = xsr.getAttributeValue(null, "Name");
            RelationSet relations = null;
            if (relationSetName == null) {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading relation set for default annotation set");
                }
                relations = doc.getAnnotations().getRelations();
            } else {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading relation set for \"" + relationSetName + "\" annotation set");
                }
                relations = doc.getAnnotations(relationSetName).getRelations();
            }
            SortedSet<Integer> relIdsInSet = new TreeSet<Integer>();
            readRelationSet(xsr, relations, relIdsInSet);
            if (relIdsInSet.size() > 0 && (maxAnnotId == null || relIdsInSet.last().intValue() > maxAnnotId.intValue())) {
                maxAnnotId = relIdsInSet.last();
            }
            numAnnots += relIdsInSet.size();
            // readAnnotationSet leaves reader positioned on the
            // </RelationSet> tag, so nextTag takes us to either the next
            // <RelationSet> or to the </GateDocument>
            eventType = xsr.nextTag();
        }
        // check we are on the end document tag
        xsr.require(XMLStreamConstants.END_ELEMENT, null, "GateDocument");
        doc.setFeatures(documentFeatures);
        // set the ID generator, if doc is a DocumentImpl
        if (doc instanceof DocumentImpl && maxAnnotId != null) {
            ((DocumentImpl) doc).setNextAnnotationId(maxAnnotId.intValue() + 1);
        }
        if (statusListener != null) {
            statusListener.statusChanged("Finished.  " + numAnnots + " annotation(s) processed");
        }
    }// in case of exception, reset document content to the unparsed XML
     catch (XMLStreamException xse) {
        doc.setContent(savedContent);
        throw xse;
    } catch (RuntimeException re) {
        doc.setContent(savedContent);
        throw re;
    }
}
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) FeatureMap(gate.FeatureMap) GateRuntimeException(gate.util.GateRuntimeException) XMLStreamException(javax.xml.stream.XMLStreamException) DocumentContent(gate.DocumentContent) TreeSet(java.util.TreeSet) RelationSet(gate.relations.RelationSet)

Example 30 with FeatureMap

use of gate.FeatureMap in project gate-core by GateNLP.

the class DocumentStaxUtils method readXcesFeatureMap.

/**
 * Processes a struct element to build a feature map. The element is
 * expected to contain feat children, each with name and value
 * attributes. The reader will be returned positioned on the closing
 * struct tag.
 *
 * @throws XMLStreamException
 */
public static FeatureMap readXcesFeatureMap(XMLStreamReader xsr) throws XMLStreamException {
    FeatureMap fm = Factory.newFeatureMap();
    while (xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
        xsr.require(XMLStreamConstants.START_ELEMENT, XCES_NAMESPACE, "feat");
        String featureName = xsr.getAttributeValue(null, "name");
        Object featureValue = xsr.getAttributeValue(null, "value");
        fm.put(featureName, featureValue);
        // read the (possibly virtual) closing tag of the feat element
        xsr.nextTag();
        xsr.require(XMLStreamConstants.END_ELEMENT, XCES_NAMESPACE, "feat");
    }
    return fm;
}
Also used : FeatureMap(gate.FeatureMap)

Aggregations

FeatureMap (gate.FeatureMap)55 Document (gate.Document)15 URL (java.net.URL)14 ResourceInstantiationException (gate.creole.ResourceInstantiationException)11 File (java.io.File)10 Resource (gate.Resource)8 GateRuntimeException (gate.util.GateRuntimeException)7 ArrayList (java.util.ArrayList)7 List (java.util.List)7 PersistenceException (gate.persist.PersistenceException)6 Annotation (gate.Annotation)5 AnnotationSet (gate.AnnotationSet)5 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 TestDocument (gate.corpora.TestDocument)4 ResourceData (gate.creole.ResourceData)4 SerialDataStore (gate.persist.SerialDataStore)4 InvalidOffsetException (gate.util.InvalidOffsetException)4 Corpus (gate.Corpus)3 ProcessingResource (gate.ProcessingResource)3