Search in sources :

Example 11 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class TestClassificationMeasures method test.

public void test() {
    String type = "sent";
    String feature = "Op";
    Document doc1 = null;
    Document doc2 = null;
    Document doc3 = null;
    Document doc4 = null;
    try {
        Gate.init();
        doc1 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/beijing-opera.xml"));
        doc2 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/beijing-opera.xml"));
        doc3 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/in-outlook-09-aug-2001.xml"));
        doc4 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/in-outlook-09-aug-2001.xml"));
    } catch (Exception e) {
        e.printStackTrace();
    }
    if (doc1 != null && doc2 != null && doc3 != null && doc4 != null) {
        AnnotationSet as1 = doc1.getAnnotations("ann1");
        AnnotationSet as2 = doc2.getAnnotations("ann2");
        ClassificationMeasures myClassificationMeasures1 = new ClassificationMeasures();
        myClassificationMeasures1.calculateConfusionMatrix(as1, as2, type, feature, true);
        assertEquals(myClassificationMeasures1.getObservedAgreement(), 0.7777778f);
        assertEquals(myClassificationMeasures1.getKappaCohen(), 0.6086957f);
        assertEquals(myClassificationMeasures1.getKappaPi(), 0.59550565f);
        AnnotationSet as3 = doc3.getAnnotations("ann1");
        AnnotationSet as4 = doc4.getAnnotations("ann2");
        ClassificationMeasures myClassificationMeasures2 = new ClassificationMeasures();
        myClassificationMeasures2.calculateConfusionMatrix(as3, as4, type, feature, true);
        assertEquals(myClassificationMeasures2.getObservedAgreement(), 0.96875f);
        assertEquals(myClassificationMeasures2.getKappaCohen(), 0.3263158f);
        assertEquals(myClassificationMeasures2.getKappaPi(), 0.3227513f);
        ArrayList<ClassificationMeasures> tablesList = new ArrayList<ClassificationMeasures>();
        tablesList.add(myClassificationMeasures1);
        tablesList.add(myClassificationMeasures2);
        ClassificationMeasures myNewClassificationMeasures = new ClassificationMeasures(tablesList);
        assertEquals(myNewClassificationMeasures.getObservedAgreement(), 0.94520545f);
        assertEquals(myNewClassificationMeasures.getKappaCohen(), 0.7784521f);
        assertEquals(myNewClassificationMeasures.getKappaPi(), 0.7778622f);
    } else {
        System.out.println("Failed to create docs from URLs.");
    }
}
Also used : ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Document(gate.Document) TestDocument(gate.corpora.TestDocument) URL(java.net.URL)

Example 12 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class DocumentStaxUtils method readGateXmlDocument.

/**
 * Reads GATE XML format data from the given XMLStreamReader and puts
 * the content and annotation sets into the given Document, replacing
 * its current content. The reader must be positioned on the opening
 * GateDocument tag (i.e. the last event was a START_ELEMENT for which
 * getLocalName returns "GateDocument"), and when the method returns
 * the reader will be left positioned on the corresponding closing
 * tag.
 *
 * @param xsr the source of the XML to parse
 * @param doc the document to update
 * @param statusListener optional status listener to receive status
 *          messages
 * @throws XMLStreamException
 */
public static void readGateXmlDocument(XMLStreamReader xsr, Document doc, StatusListener statusListener) throws XMLStreamException {
    DocumentContent savedContent = null;
    // check the precondition
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocument");
    // process the document features
    xsr.nextTag();
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocumentFeatures");
    if (statusListener != null) {
        statusListener.statusChanged("Reading document features");
    }
    FeatureMap documentFeatures = readFeatureMap(xsr);
    // read document text, building the map of node IDs to offsets
    xsr.nextTag();
    xsr.require(XMLStreamConstants.START_ELEMENT, null, "TextWithNodes");
    Map<Integer, Long> nodeIdToOffsetMap = new HashMap<Integer, Long>();
    if (statusListener != null) {
        statusListener.statusChanged("Reading document content");
    }
    String documentText = readTextWithNodes(xsr, nodeIdToOffsetMap);
    // save the content, in case anything goes wrong later
    savedContent = doc.getContent();
    // set the document content to the text with nodes text.
    doc.setContent(new DocumentContentImpl(documentText));
    try {
        int numAnnots = 0;
        // process annotation sets, using the node map built above
        Integer maxAnnotId = null;
        // initially, we don't know whether annotation IDs are required or
        // not
        Boolean requireAnnotationIds = null;
        int eventType = xsr.nextTag();
        while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("AnnotationSet")) {
            xsr.require(XMLStreamConstants.START_ELEMENT, null, "AnnotationSet");
            String annotationSetName = xsr.getAttributeValue(null, "Name");
            AnnotationSet annotationSet = null;
            if (annotationSetName == null) {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading default annotation set");
                }
                annotationSet = doc.getAnnotations();
            } else {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading \"" + annotationSetName + "\" annotation set");
                }
                annotationSet = doc.getAnnotations(annotationSetName);
            }
            annotationSet.clear();
            SortedSet<Integer> annotIdsInSet = new TreeSet<Integer>();
            requireAnnotationIds = readAnnotationSet(xsr, annotationSet, nodeIdToOffsetMap, annotIdsInSet, requireAnnotationIds);
            if (annotIdsInSet.size() > 0 && (maxAnnotId == null || annotIdsInSet.last().intValue() > maxAnnotId.intValue())) {
                maxAnnotId = annotIdsInSet.last();
            }
            numAnnots += annotIdsInSet.size();
            // readAnnotationSet leaves reader positioned on the
            // </AnnotationSet> tag, so nextTag takes us to either the next
            // <AnnotationSet>, a <RelationSet>, or </GateDocument>
            eventType = xsr.nextTag();
        }
        while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("RelationSet")) {
            xsr.require(XMLStreamConstants.START_ELEMENT, null, "RelationSet");
            String relationSetName = xsr.getAttributeValue(null, "Name");
            RelationSet relations = null;
            if (relationSetName == null) {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading relation set for default annotation set");
                }
                relations = doc.getAnnotations().getRelations();
            } else {
                if (statusListener != null) {
                    statusListener.statusChanged("Reading relation set for \"" + relationSetName + "\" annotation set");
                }
                relations = doc.getAnnotations(relationSetName).getRelations();
            }
            SortedSet<Integer> relIdsInSet = new TreeSet<Integer>();
            readRelationSet(xsr, relations, relIdsInSet);
            if (relIdsInSet.size() > 0 && (maxAnnotId == null || relIdsInSet.last().intValue() > maxAnnotId.intValue())) {
                maxAnnotId = relIdsInSet.last();
            }
            numAnnots += relIdsInSet.size();
            // readAnnotationSet leaves reader positioned on the
            // </RelationSet> tag, so nextTag takes us to either the next
            // <RelationSet> or to the </GateDocument>
            eventType = xsr.nextTag();
        }
        // check we are on the end document tag
        xsr.require(XMLStreamConstants.END_ELEMENT, null, "GateDocument");
        doc.setFeatures(documentFeatures);
        // set the ID generator, if doc is a DocumentImpl
        if (doc instanceof DocumentImpl && maxAnnotId != null) {
            ((DocumentImpl) doc).setNextAnnotationId(maxAnnotId.intValue() + 1);
        }
        if (statusListener != null) {
            statusListener.statusChanged("Finished.  " + numAnnots + " annotation(s) processed");
        }
    }// in case of exception, reset document content to the unparsed XML
     catch (XMLStreamException xse) {
        doc.setContent(savedContent);
        throw xse;
    } catch (RuntimeException re) {
        doc.setContent(savedContent);
        throw re;
    }
}
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) FeatureMap(gate.FeatureMap) GateRuntimeException(gate.util.GateRuntimeException) XMLStreamException(javax.xml.stream.XMLStreamException) DocumentContent(gate.DocumentContent) TreeSet(java.util.TreeSet) RelationSet(gate.relations.RelationSet)

Example 13 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class AnnotationSetImpl method get.

// get(types)
/**
 * Select annotations by type and features
 *
 * This will return an annotation set containing just those annotations of a
 * particular type (i.e. with a particular name) and which have features with
 * specific names and values. (It will also return annotations that have
 * features besides those specified, but it will not return any annotations
 * that do not have all the specified feature-value pairs.)
 *
 * However, if constraints contains a feature whose value is equal to
 * gate.creole.ANNIEConstants.LOOKUP_CLASS_FEATURE_NAME (which is normally
 * "class"), then GATE will attempt to match that feature using an ontology
 * which it will try to retreive from a feature on the both the annotation and
 * in constraints. If these do not return identical ontologies, or if either
 * the annotation or constraints does not contain an ontology, then matching
 * will fail, and the annotation will not be added. In summary, this method
 * will not work normally for features with the name "class".
 *
 * @param type
 *          The name of the annotations to return.
 * @param constraints
 *          A feature map containing all of the feature value pairs that the
 *          annotation must have in order for them to be returned.
 * @return An annotation set containing only those annotations with the given
 *         name and which have the specified set of feature-value pairs.
 */
@Override
public AnnotationSet get(String type, FeatureMap constraints) {
    if (annotsByType == null)
        indexByType();
    AnnotationSet typeSet = get(type);
    if (typeSet == null)
        return null;
    Iterator<Annotation> iter = typeSet.iterator();
    List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
    while (iter.hasNext()) {
        Annotation a = iter.next();
        // (a.getFeatures().entrySet().containsAll(constraints.entrySet()))
        if (a.getFeatures().subsumes(constraints))
            annotationsToAdd.add(a);
    }
    // while
    if (annotationsToAdd.isEmpty())
        return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
}
Also used : ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 14 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class AnnotationSetImpl method get.

// get(type, constraints)
/**
 * Select annotations by type and feature names
 */
@Override
public AnnotationSet get(String type, Set<? extends Object> featureNames) {
    if (annotsByType == null)
        indexByType();
    AnnotationSet typeSet = null;
    if (type != null) {
        // if a type is provided, try finding annotations of this type
        typeSet = get(type);
        // if none exist, then return coz nothing left to do
        if (typeSet == null)
            return null;
    }
    List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
    Iterator<Annotation> iter = null;
    if (type != null)
        iter = typeSet.iterator();
    else
        iter = annotsById.values().iterator();
    while (iter.hasNext()) {
        Annotation a = iter.next();
        // key/value pairs from the constraints map
        if (a.getFeatures().keySet().containsAll(featureNames))
            annotationsToAdd.add(a);
    }
    // while
    if (annotationsToAdd.isEmpty())
        return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
}
Also used : ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 15 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class AnnotationSetImpl method addToTypeIndex.

// indexByStartOffset()
/**
 * Add an annotation to the type index. Does nothing if the index doesn't
 * exist.
 */
void addToTypeIndex(Annotation a) {
    if (annotsByType == null)
        return;
    String type = a.getType();
    AnnotationSet sameType = annotsByType.get(type);
    if (sameType == null) {
        sameType = new AnnotationSetImpl(doc);
        annotsByType.put(type, sameType);
    }
    sameType.add(a);
}
Also used : AnnotationSet(gate.AnnotationSet)

Aggregations

AnnotationSet (gate.AnnotationSet)43 Annotation (gate.Annotation)27 ArrayList (java.util.ArrayList)14 HashMap (java.util.HashMap)11 HashSet (java.util.HashSet)11 Document (gate.Document)9 List (java.util.List)8 FeatureMap (gate.FeatureMap)7 InvalidOffsetException (gate.util.InvalidOffsetException)6 AnnotationSetImpl (gate.annotation.AnnotationSetImpl)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)4 Point (java.awt.Point)4 IOException (java.io.IOException)4 URL (java.net.URL)4 Map (java.util.Map)4 Color (java.awt.Color)3 TreeSet (java.util.TreeSet)3 TestDocument (gate.corpora.TestDocument)2