use of gate.AnnotationSet in project gate-core by GateNLP.
the class AnnotationSetImpl method get.
// get(type)
/**
* Select annotations by a set of types. Expects a Set of String.
*
* @return an ImmutableAnnotationSet
*/
@Override
public AnnotationSet get(Set<String> types) throws ClassCastException {
if (annotsByType == null)
indexByType();
Iterator<String> iter = types.iterator();
List<Annotation> annotations = new ArrayList<Annotation>();
while (iter.hasNext()) {
String type = iter.next();
AnnotationSet as = annotsByType.get(type);
if (as != null) {
Iterator<Annotation> iterAnnot = as.iterator();
while (iterAnnot.hasNext()) {
annotations.add(iterAnnot.next());
}
}
}
// while
if (annotations.isEmpty())
return emptyAS();
return new ImmutableAnnotationSetImpl(doc, annotations);
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class CorpusAnnotationDiff method doDiff.
// printStructure
/**
* This method is the brain of the AnnotationSet diff and creates a set with
* diffSetElement objects.
* @param aKeyAnnotList a list containing the annotations from key. If this
* param is <b>null</b> then the method will simply return and will not do a
* thing.
* @param aResponseAnnotList a list containing the annotation from response.
* If this param is <b>null</b> the method will return.
*/
protected void doDiff(List<Annotation> aKeyAnnotList, List<Annotation> aResponseAnnotList) {
// If one of the annotation sets is null then is no point in doing the diff.
if (aKeyAnnotList == null || aResponseAnnotList == null)
return;
// Iterate throught all elements from keyList and find those in the response
// list which satisfies isCompatible() and isPartiallyCompatible() relations
Iterator<Annotation> keyIterator = aKeyAnnotList.iterator();
while (keyIterator.hasNext()) {
Annotation keyAnnot = keyIterator.next();
Iterator<Annotation> responseIterator = aResponseAnnotList.iterator();
DiffSetElement diffElement = null;
while (responseIterator.hasNext()) {
Annotation responseAnnot = responseIterator.next();
if (keyAnnot.isPartiallyCompatible(responseAnnot, keyFeatureNamesSet)) {
keyPartiallySet.add(keyAnnot);
responsePartiallySet.add(responseAnnot);
if (keyAnnot.coextensive(responseAnnot)) {
// Found two compatible annotations
// Create a new DiffSetElement and add it to the diffSet
diffElement = new DiffSetElement(keyAnnot, responseAnnot, DEFAULT_TYPE, CORRECT_TYPE, keyDocument, responseDocument);
// Add this element to the DiffSet
addToDiffset(diffElement);
}
// End if (keyAnnot.coextensive(responseAnnot))
} else if (keyAnnot.coextensive(responseAnnot)) {
// Found two aligned annotations. We have to find out if the response
// is partialy compatible with another key annotation.
// Create a new DiffSetElement and add it to the diffSet
diffElement = new DiffSetElement(keyAnnot, responseAnnot, detectKeyType(keyAnnot), detectResponseType(responseAnnot), keyDocument, responseDocument);
// Add this element to the DiffSet
addToDiffset(diffElement);
}
if (diffElement != null) {
// Eliminate the response annotation from the list.
responseIterator.remove();
break;
}
// End if
}
// If diffElement != null it means that break was used
if (diffElement == null) {
if (keyPartiallySet.contains(keyAnnot))
diffElement = new DiffSetElement(keyAnnot, null, DEFAULT_TYPE, NULL_TYPE, keyDocument, responseDocument);
else {
// If keyAnnot is not in keyPartiallySet then it has to be checked
// agains all annotations in DiffSet to see if there is
// a previous annotation from response set which is partially
// compatible with the keyAnnot
Iterator<DiffSetElement> respParIter = diffSet.iterator();
while (respParIter.hasNext()) {
DiffSetElement diffElem = respParIter.next();
Annotation respAnnot = diffElem.getRightAnnotation();
if (respAnnot != null && keyAnnot.isPartiallyCompatible(respAnnot, keyFeatureNamesSet)) {
diffElement = new DiffSetElement(keyAnnot, null, DEFAULT_TYPE, NULL_TYPE, keyDocument, responseDocument);
break;
}
// End if
}
// If is still nul then it means that the key annotation is missing
if (diffElement == null)
diffElement = new DiffSetElement(keyAnnot, null, MISSING_TYPE, NULL_TYPE, keyDocument, responseDocument);
}
// End if
addToDiffset(diffElement);
}
// End if
keyIterator.remove();
}
// end while keyIterator
DiffSetElement diffElem = null;
Iterator<Annotation> responseIter = aResponseAnnotList.iterator();
while (responseIter.hasNext()) {
Annotation respAnnot = responseIter.next();
if (responsePartiallySet.contains(respAnnot))
diffElem = new DiffSetElement(null, respAnnot, NULL_TYPE, PARTIALLY_CORRECT_TYPE, keyDocument, responseDocument);
else
diffElem = new DiffSetElement(null, respAnnot, NULL_TYPE, SPURIOUS_TYPE, keyDocument, responseDocument);
addToDiffset(diffElem);
responseIter.remove();
}
// End while
// CALCULATE ALL (NLP) MEASURES like:
// Precistion, Recall, FalsePositive and F-Measure
int possible = // this comes from Key or Resp
typeCounter[CORRECT_TYPE] + // this comes from Resp
typeCounter[PARTIALLY_CORRECT_TYPE] + // this comes from Key
typeCounter[MISSING_TYPE];
int actual = // this comes from Key or Resp
typeCounter[CORRECT_TYPE] + // this comes from Resp
typeCounter[PARTIALLY_CORRECT_TYPE] + // this comes from Resp
typeCounter[SPURIOUS_TYPE];
/*
if (actual != responseSize)
Err.prln("AnnotDiff warning: The response size(" + responseSize +
") is not the same as the computed value of" +
" actual(Correct[resp or key]+Partial[resp]+Spurious[resp]=" + actual +")");
*/
if (actual != 0) {
precisionStrict = ((double) typeCounter[CORRECT_TYPE]) / ((double) actual);
precisionLenient = ((double) (typeCounter[CORRECT_TYPE] + typeCounter[PARTIALLY_CORRECT_TYPE])) / ((double) actual);
precisionAverage = (precisionStrict + precisionLenient) / 2;
}
// End if
if (possible != 0) {
recallStrict = ((double) typeCounter[CORRECT_TYPE]) / ((double) possible);
recallLenient = ((double) (typeCounter[CORRECT_TYPE] + typeCounter[PARTIALLY_CORRECT_TYPE])) / ((double) possible);
recallAverage = (recallStrict + recallLenient) / 2;
}
// End if
int no = 0;
// Annotations
if (annotationTypeForFalsePositive != null)
// Was it the default set ?
if (responseAnnotationSetNameFalsePoz == null) {
AnnotationSet aSet = responseDocument.getAnnotations().get(annotationTypeForFalsePositive);
no = aSet == null ? 0 : aSet.size();
} else {
AnnotationSet aSet = responseDocument.getAnnotations(responseAnnotationSetNameFalsePoz).get(annotationTypeForFalsePositive);
no = aSet == null ? 0 : aSet.size();
}
if (no != 0) {
// No error here: the formula is the opposite to recall or precission
falsePositiveStrict = ((double) (typeCounter[SPURIOUS_TYPE] + typeCounter[PARTIALLY_CORRECT_TYPE])) / ((double) no);
falsePositiveLenient = ((double) typeCounter[SPURIOUS_TYPE]) / ((double) no);
falsePositiveAverage = (falsePositiveStrict + falsePositiveLenient) / 2;
}
// End if
// Calculate F-Measure Strict
double denominator = weight * (precisionStrict + recallStrict);
if (denominator != 0)
fMeasureStrict = (precisionStrict * recallStrict) / denominator;
else
fMeasureStrict = 0.0;
// Calculate F-Measure Lenient
denominator = weight * (precisionLenient + recallLenient);
if (denominator != 0)
fMeasureLenient = (precisionLenient * recallLenient) / denominator;
else
fMeasureLenient = 0.0;
// Calculate F-Measure Average
fMeasureAverage = (fMeasureStrict + fMeasureLenient) / 2;
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class TestXml method testUnpackMarkup.
// runCompleteTestWithAFormat
/**
* A test
*/
public void testUnpackMarkup() throws Exception {
// create the markupElementsMap map
// Map markupElementsMap = null;
gate.Document doc = null;
/*
markupElementsMap = new HashMap();
// populate it
markupElementsMap.put ("S","Sentence");
markupElementsMap.put ("s","Sentence");
*/
// Create the element2String map
Map<String, String> anElement2StringMap = new HashMap<String, String>();
// Populate it
anElement2StringMap.put("S", "\n");
anElement2StringMap.put("s", "\n");
doc = gate.Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/xml/xces.xml"), workingEncoding);
AnnotationSet annotSet = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
assertEquals("For " + doc.getSourceUrl() + " the number of annotations" + " should be:758", 758, annotSet.size());
gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
// Verifies if the maximum annotation ID on the GATE doc is less than the
// Annotation ID generator of the document.
verifyAnnotationIDGenerator(doc);
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class TestXml method buildID2AnnotMap.
/**
* Scans a target Doc for all Annotations and builds a map (from anot ID to annot) in the process
* I also checks to see if there are two annotations with the same ID.
* @param aDoc The GATE doc to be scaned
* @return a Map ID2Annot
*/
private Map<Integer, Annotation> buildID2AnnotMap(Document aDoc) {
Map<Integer, Annotation> id2AnnMap = new HashMap<Integer, Annotation>();
// Scan the default annotation set
AnnotationSet annotSet = aDoc.getAnnotations();
addAnnotSet2Map(annotSet, id2AnnMap);
// Scan all named annotation sets
if (aDoc.getNamedAnnotationSets() != null) {
for (Iterator<AnnotationSet> namedAnnotSetsIter = aDoc.getNamedAnnotationSets().values().iterator(); namedAnnotSetsIter.hasNext(); ) {
addAnnotSet2Map(namedAnnotSetsIter.next(), id2AnnMap);
}
// End while
}
// End if
return id2AnnMap;
}
use of gate.AnnotationSet in project gate-core by GateNLP.
the class TestSgml method testSgmlLoading.
// setUp
public void testSgmlLoading() throws Exception {
assertTrue(true);
// create the markupElementsMap map
Map<String, String> markupElementsMap = null;
gate.Document doc = null;
/*
markupElementsMap = new HashMap();
// populate it
markupElementsMap.put ("S","Sentence");
markupElementsMap.put ("s","Sentence");
markupElementsMap.put ("W","Word");
markupElementsMap.put ("w","Word");
*/
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(TestDocument.getTestServerName() + "tests/sgml/Hds.sgm"));
params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
// get the docFormat that deals with it.
// the parameter MimeType doesn't affect right now the behaviour
// *
gate.DocumentFormat docFormat = gate.DocumentFormat.getDocumentFormat(doc, doc.getSourceUrl());
assertTrue("Bad document Format was produced. SgmlDocumentFormat was expected", docFormat instanceof gate.corpora.SgmlDocumentFormat);
// set's the map
docFormat.setMarkupElementsMap(markupElementsMap);
docFormat.unpackMarkup(doc, "DocumentContent");
AnnotationSet annotSet = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
assertEquals("For " + doc.getSourceUrl() + " the number of annotations" + " should be:1022", 1022, annotSet.size());
// Verfy if all annotations from the default annotation set are consistent
gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
}
Aggregations