use of gate.AnnotationSet in project gate-core by GateNLP.
the class TestClassificationMeasures method test.
public void test() {
String type = "sent";
String feature = "Op";
Document doc1 = null;
Document doc2 = null;
Document doc3 = null;
Document doc4 = null;
try {
Gate.init();
doc1 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/beijing-opera.xml"));
doc2 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/beijing-opera.xml"));
doc3 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/in-outlook-09-aug-2001.xml"));
doc4 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/in-outlook-09-aug-2001.xml"));
} catch (Exception e) {
e.printStackTrace();
}
if (doc1 != null && doc2 != null && doc3 != null && doc4 != null) {
AnnotationSet as1 = doc1.getAnnotations("ann1");
AnnotationSet as2 = doc2.getAnnotations("ann2");
ClassificationMeasures myClassificationMeasures1 = new ClassificationMeasures();
myClassificationMeasures1.calculateConfusionMatrix(as1, as2, type, feature, true);
assertEquals(myClassificationMeasures1.getObservedAgreement(), 0.7777778f);
assertEquals(myClassificationMeasures1.getKappaCohen(), 0.6086957f);
assertEquals(myClassificationMeasures1.getKappaPi(), 0.59550565f);
AnnotationSet as3 = doc3.getAnnotations("ann1");
AnnotationSet as4 = doc4.getAnnotations("ann2");
ClassificationMeasures myClassificationMeasures2 = new ClassificationMeasures();
myClassificationMeasures2.calculateConfusionMatrix(as3, as4, type, feature, true);
assertEquals(myClassificationMeasures2.getObservedAgreement(), 0.96875f);
assertEquals(myClassificationMeasures2.getKappaCohen(), 0.3263158f);
assertEquals(myClassificationMeasures2.getKappaPi(), 0.3227513f);
ArrayList<ClassificationMeasures> tablesList = new ArrayList<ClassificationMeasures>();
tablesList.add(myClassificationMeasures1);
tablesList.add(myClassificationMeasures2);
ClassificationMeasures myNewClassificationMeasures = new ClassificationMeasures(tablesList);
assertEquals(myNewClassificationMeasures.getObservedAgreement(), 0.94520545f);
assertEquals(myNewClassificationMeasures.getKappaCohen(), 0.7784521f);
assertEquals(myNewClassificationMeasures.getKappaPi(), 0.7778622f);
} else {
System.out.println("Failed to create docs from URLs.");
}
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class DocumentStaxUtils method readGateXmlDocument.
/**
* Reads GATE XML format data from the given XMLStreamReader and puts
* the content and annotation sets into the given Document, replacing
* its current content. The reader must be positioned on the opening
* GateDocument tag (i.e. the last event was a START_ELEMENT for which
* getLocalName returns "GateDocument"), and when the method returns
* the reader will be left positioned on the corresponding closing
* tag.
*
* @param xsr the source of the XML to parse
* @param doc the document to update
* @param statusListener optional status listener to receive status
* messages
* @throws XMLStreamException
*/
public static void readGateXmlDocument(XMLStreamReader xsr, Document doc, StatusListener statusListener) throws XMLStreamException {
DocumentContent savedContent = null;
// check the precondition
xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocument");
// process the document features
xsr.nextTag();
xsr.require(XMLStreamConstants.START_ELEMENT, null, "GateDocumentFeatures");
if (statusListener != null) {
statusListener.statusChanged("Reading document features");
}
FeatureMap documentFeatures = readFeatureMap(xsr);
// read document text, building the map of node IDs to offsets
xsr.nextTag();
xsr.require(XMLStreamConstants.START_ELEMENT, null, "TextWithNodes");
Map<Integer, Long> nodeIdToOffsetMap = new HashMap<Integer, Long>();
if (statusListener != null) {
statusListener.statusChanged("Reading document content");
}
String documentText = readTextWithNodes(xsr, nodeIdToOffsetMap);
// save the content, in case anything goes wrong later
savedContent = doc.getContent();
// set the document content to the text with nodes text.
doc.setContent(new DocumentContentImpl(documentText));
try {
int numAnnots = 0;
// process annotation sets, using the node map built above
Integer maxAnnotId = null;
// initially, we don't know whether annotation IDs are required or
// not
Boolean requireAnnotationIds = null;
int eventType = xsr.nextTag();
while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("AnnotationSet")) {
xsr.require(XMLStreamConstants.START_ELEMENT, null, "AnnotationSet");
String annotationSetName = xsr.getAttributeValue(null, "Name");
AnnotationSet annotationSet = null;
if (annotationSetName == null) {
if (statusListener != null) {
statusListener.statusChanged("Reading default annotation set");
}
annotationSet = doc.getAnnotations();
} else {
if (statusListener != null) {
statusListener.statusChanged("Reading \"" + annotationSetName + "\" annotation set");
}
annotationSet = doc.getAnnotations(annotationSetName);
}
annotationSet.clear();
SortedSet<Integer> annotIdsInSet = new TreeSet<Integer>();
requireAnnotationIds = readAnnotationSet(xsr, annotationSet, nodeIdToOffsetMap, annotIdsInSet, requireAnnotationIds);
if (annotIdsInSet.size() > 0 && (maxAnnotId == null || annotIdsInSet.last().intValue() > maxAnnotId.intValue())) {
maxAnnotId = annotIdsInSet.last();
}
numAnnots += annotIdsInSet.size();
// readAnnotationSet leaves reader positioned on the
// </AnnotationSet> tag, so nextTag takes us to either the next
// <AnnotationSet>, a <RelationSet>, or </GateDocument>
eventType = xsr.nextTag();
}
while (eventType == XMLStreamConstants.START_ELEMENT && xsr.getLocalName().equals("RelationSet")) {
xsr.require(XMLStreamConstants.START_ELEMENT, null, "RelationSet");
String relationSetName = xsr.getAttributeValue(null, "Name");
RelationSet relations = null;
if (relationSetName == null) {
if (statusListener != null) {
statusListener.statusChanged("Reading relation set for default annotation set");
}
relations = doc.getAnnotations().getRelations();
} else {
if (statusListener != null) {
statusListener.statusChanged("Reading relation set for \"" + relationSetName + "\" annotation set");
}
relations = doc.getAnnotations(relationSetName).getRelations();
}
SortedSet<Integer> relIdsInSet = new TreeSet<Integer>();
readRelationSet(xsr, relations, relIdsInSet);
if (relIdsInSet.size() > 0 && (maxAnnotId == null || relIdsInSet.last().intValue() > maxAnnotId.intValue())) {
maxAnnotId = relIdsInSet.last();
}
numAnnots += relIdsInSet.size();
// readAnnotationSet leaves reader positioned on the
// </RelationSet> tag, so nextTag takes us to either the next
// <RelationSet> or to the </GateDocument>
eventType = xsr.nextTag();
}
// check we are on the end document tag
xsr.require(XMLStreamConstants.END_ELEMENT, null, "GateDocument");
doc.setFeatures(documentFeatures);
// set the ID generator, if doc is a DocumentImpl
if (doc instanceof DocumentImpl && maxAnnotId != null) {
((DocumentImpl) doc).setNextAnnotationId(maxAnnotId.intValue() + 1);
}
if (statusListener != null) {
statusListener.statusChanged("Finished. " + numAnnots + " annotation(s) processed");
}
}// in case of exception, reset document content to the unparsed XML
catch (XMLStreamException xse) {
doc.setContent(savedContent);
throw xse;
} catch (RuntimeException re) {
doc.setContent(savedContent);
throw re;
}
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class AnnotationSetImpl method get.
// get(types)
/**
* Select annotations by type and features
*
* This will return an annotation set containing just those annotations of a
* particular type (i.e. with a particular name) and which have features with
* specific names and values. (It will also return annotations that have
* features besides those specified, but it will not return any annotations
* that do not have all the specified feature-value pairs.)
*
* However, if constraints contains a feature whose value is equal to
* gate.creole.ANNIEConstants.LOOKUP_CLASS_FEATURE_NAME (which is normally
* "class"), then GATE will attempt to match that feature using an ontology
* which it will try to retreive from a feature on the both the annotation and
* in constraints. If these do not return identical ontologies, or if either
* the annotation or constraints does not contain an ontology, then matching
* will fail, and the annotation will not be added. In summary, this method
* will not work normally for features with the name "class".
*
* @param type
* The name of the annotations to return.
* @param constraints
* A feature map containing all of the feature value pairs that the
* annotation must have in order for them to be returned.
* @return An annotation set containing only those annotations with the given
* name and which have the specified set of feature-value pairs.
*/
@Override
public AnnotationSet get(String type, FeatureMap constraints) {
if (annotsByType == null)
indexByType();
AnnotationSet typeSet = get(type);
if (typeSet == null)
return null;
Iterator<Annotation> iter = typeSet.iterator();
List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
while (iter.hasNext()) {
Annotation a = iter.next();
// (a.getFeatures().entrySet().containsAll(constraints.entrySet()))
if (a.getFeatures().subsumes(constraints))
annotationsToAdd.add(a);
}
// while
if (annotationsToAdd.isEmpty())
return emptyAS();
return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class AnnotationSetImpl method get.
// get(type, constraints)
/**
* Select annotations by type and feature names
*/
@Override
public AnnotationSet get(String type, Set<? extends Object> featureNames) {
if (annotsByType == null)
indexByType();
AnnotationSet typeSet = null;
if (type != null) {
// if a type is provided, try finding annotations of this type
typeSet = get(type);
// if none exist, then return coz nothing left to do
if (typeSet == null)
return null;
}
List<Annotation> annotationsToAdd = new ArrayList<Annotation>();
Iterator<Annotation> iter = null;
if (type != null)
iter = typeSet.iterator();
else
iter = annotsById.values().iterator();
while (iter.hasNext()) {
Annotation a = iter.next();
// key/value pairs from the constraints map
if (a.getFeatures().keySet().containsAll(featureNames))
annotationsToAdd.add(a);
}
// while
if (annotationsToAdd.isEmpty())
return emptyAS();
return new ImmutableAnnotationSetImpl(doc, annotationsToAdd);
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class AnnotationSetImpl method addToTypeIndex.
// indexByStartOffset()
/**
* Add an annotation to the type index. Does nothing if the index doesn't
* exist.
*/
void addToTypeIndex(Annotation a) {
if (annotsByType == null)
return;
String type = a.getType();
AnnotationSet sameType = annotsByType.get(type);
if (sameType == null) {
sameType = new AnnotationSetImpl(doc);
annotsByType.put(type, sameType);
}
sameType.add(a);
}
Aggregations