Search in sources :

Example 26 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class AnnotationSetImpl method get.

// get(type)
/**
 * Select annotations by a set of types. Expects a Set of String.
 *
 * @return an ImmutableAnnotationSet
 */
@Override
public AnnotationSet get(Set<String> types) throws ClassCastException {
    if (annotsByType == null)
        indexByType();
    Iterator<String> iter = types.iterator();
    List<Annotation> annotations = new ArrayList<Annotation>();
    while (iter.hasNext()) {
        String type = iter.next();
        AnnotationSet as = annotsByType.get(type);
        if (as != null) {
            Iterator<Annotation> iterAnnot = as.iterator();
            while (iterAnnot.hasNext()) {
                annotations.add(iterAnnot.next());
            }
        }
    }
    // while
    if (annotations.isEmpty())
        return emptyAS();
    return new ImmutableAnnotationSetImpl(doc, annotations);
}
Also used : ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 27 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class CorpusAnnotationDiff method doDiff.

// printStructure
/**
 * This method is the brain of the AnnotationSet diff and creates a set with
 * diffSetElement objects.
 * @param aKeyAnnotList a list containing the annotations from key. If this
 * param is <b>null</b> then the method will simply return and will not do a
 * thing.
 * @param aResponseAnnotList a list containing the annotation from response.
 * If this param is <b>null</b> the method will return.
 */
protected void doDiff(List<Annotation> aKeyAnnotList, List<Annotation> aResponseAnnotList) {
    // If one of the annotation sets is null then is no point in doing the diff.
    if (aKeyAnnotList == null || aResponseAnnotList == null)
        return;
    // Iterate throught all elements from keyList and find those in the response
    // list which satisfies isCompatible() and isPartiallyCompatible() relations
    Iterator<Annotation> keyIterator = aKeyAnnotList.iterator();
    while (keyIterator.hasNext()) {
        Annotation keyAnnot = keyIterator.next();
        Iterator<Annotation> responseIterator = aResponseAnnotList.iterator();
        DiffSetElement diffElement = null;
        while (responseIterator.hasNext()) {
            Annotation responseAnnot = responseIterator.next();
            if (keyAnnot.isPartiallyCompatible(responseAnnot, keyFeatureNamesSet)) {
                keyPartiallySet.add(keyAnnot);
                responsePartiallySet.add(responseAnnot);
                if (keyAnnot.coextensive(responseAnnot)) {
                    // Found two compatible annotations
                    // Create a new DiffSetElement and add it to the diffSet
                    diffElement = new DiffSetElement(keyAnnot, responseAnnot, DEFAULT_TYPE, CORRECT_TYPE, keyDocument, responseDocument);
                    // Add this element to the DiffSet
                    addToDiffset(diffElement);
                }
            // End if (keyAnnot.coextensive(responseAnnot))
            } else if (keyAnnot.coextensive(responseAnnot)) {
                // Found two aligned annotations. We have to find out if the response
                // is partialy compatible with another key annotation.
                // Create a new DiffSetElement and add it to the diffSet
                diffElement = new DiffSetElement(keyAnnot, responseAnnot, detectKeyType(keyAnnot), detectResponseType(responseAnnot), keyDocument, responseDocument);
                // Add this element to the DiffSet
                addToDiffset(diffElement);
            }
            if (diffElement != null) {
                // Eliminate the response annotation from the list.
                responseIterator.remove();
                break;
            }
        // End if
        }
        // If diffElement != null it means that break was used
        if (diffElement == null) {
            if (keyPartiallySet.contains(keyAnnot))
                diffElement = new DiffSetElement(keyAnnot, null, DEFAULT_TYPE, NULL_TYPE, keyDocument, responseDocument);
            else {
                // If keyAnnot is not in keyPartiallySet then it has to be checked
                // agains all annotations in DiffSet to see if there is
                // a previous annotation from response set which is partially
                // compatible with the keyAnnot
                Iterator<DiffSetElement> respParIter = diffSet.iterator();
                while (respParIter.hasNext()) {
                    DiffSetElement diffElem = respParIter.next();
                    Annotation respAnnot = diffElem.getRightAnnotation();
                    if (respAnnot != null && keyAnnot.isPartiallyCompatible(respAnnot, keyFeatureNamesSet)) {
                        diffElement = new DiffSetElement(keyAnnot, null, DEFAULT_TYPE, NULL_TYPE, keyDocument, responseDocument);
                        break;
                    }
                // End if
                }
                // If is still nul then it means that the key annotation is missing
                if (diffElement == null)
                    diffElement = new DiffSetElement(keyAnnot, null, MISSING_TYPE, NULL_TYPE, keyDocument, responseDocument);
            }
            // End if
            addToDiffset(diffElement);
        }
        // End if
        keyIterator.remove();
    }
    // end while keyIterator
    DiffSetElement diffElem = null;
    Iterator<Annotation> responseIter = aResponseAnnotList.iterator();
    while (responseIter.hasNext()) {
        Annotation respAnnot = responseIter.next();
        if (responsePartiallySet.contains(respAnnot))
            diffElem = new DiffSetElement(null, respAnnot, NULL_TYPE, PARTIALLY_CORRECT_TYPE, keyDocument, responseDocument);
        else
            diffElem = new DiffSetElement(null, respAnnot, NULL_TYPE, SPURIOUS_TYPE, keyDocument, responseDocument);
        addToDiffset(diffElem);
        responseIter.remove();
    }
    // End while
    // CALCULATE ALL (NLP) MEASURES like:
    // Precistion, Recall, FalsePositive and F-Measure
    int possible = // this comes from Key or Resp
    typeCounter[CORRECT_TYPE] + // this comes from Resp
    typeCounter[PARTIALLY_CORRECT_TYPE] + // this comes from Key
    typeCounter[MISSING_TYPE];
    int actual = // this comes from Key or Resp
    typeCounter[CORRECT_TYPE] + // this comes from Resp
    typeCounter[PARTIALLY_CORRECT_TYPE] + // this comes from Resp
    typeCounter[SPURIOUS_TYPE];
    /*
    if (actual != responseSize)
      Err.prln("AnnotDiff warning: The response size(" + responseSize +
      ") is not the same as the computed value of" +
    " actual(Correct[resp or key]+Partial[resp]+Spurious[resp]=" + actual +")");
*/
    if (actual != 0) {
        precisionStrict = ((double) typeCounter[CORRECT_TYPE]) / ((double) actual);
        precisionLenient = ((double) (typeCounter[CORRECT_TYPE] + typeCounter[PARTIALLY_CORRECT_TYPE])) / ((double) actual);
        precisionAverage = (precisionStrict + precisionLenient) / 2;
    }
    // End if
    if (possible != 0) {
        recallStrict = ((double) typeCounter[CORRECT_TYPE]) / ((double) possible);
        recallLenient = ((double) (typeCounter[CORRECT_TYPE] + typeCounter[PARTIALLY_CORRECT_TYPE])) / ((double) possible);
        recallAverage = (recallStrict + recallLenient) / 2;
    }
    // End if
    int no = 0;
    // Annotations
    if (annotationTypeForFalsePositive != null)
        // Was it the default set ?
        if (responseAnnotationSetNameFalsePoz == null) {
            AnnotationSet aSet = responseDocument.getAnnotations().get(annotationTypeForFalsePositive);
            no = aSet == null ? 0 : aSet.size();
        } else {
            AnnotationSet aSet = responseDocument.getAnnotations(responseAnnotationSetNameFalsePoz).get(annotationTypeForFalsePositive);
            no = aSet == null ? 0 : aSet.size();
        }
    if (no != 0) {
        // No error here: the formula is the opposite to recall or precission
        falsePositiveStrict = ((double) (typeCounter[SPURIOUS_TYPE] + typeCounter[PARTIALLY_CORRECT_TYPE])) / ((double) no);
        falsePositiveLenient = ((double) typeCounter[SPURIOUS_TYPE]) / ((double) no);
        falsePositiveAverage = (falsePositiveStrict + falsePositiveLenient) / 2;
    }
    // End if
    // Calculate F-Measure Strict
    double denominator = weight * (precisionStrict + recallStrict);
    if (denominator != 0)
        fMeasureStrict = (precisionStrict * recallStrict) / denominator;
    else
        fMeasureStrict = 0.0;
    // Calculate F-Measure Lenient
    denominator = weight * (precisionLenient + recallLenient);
    if (denominator != 0)
        fMeasureLenient = (precisionLenient * recallLenient) / denominator;
    else
        fMeasureLenient = 0.0;
    // Calculate F-Measure Average
    fMeasureAverage = (fMeasureStrict + fMeasureLenient) / 2;
}
Also used : AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 28 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class TestXml method testUnpackMarkup.

// runCompleteTestWithAFormat
/**
 * A test
 */
public void testUnpackMarkup() throws Exception {
    // create the markupElementsMap map
    // Map markupElementsMap = null;
    gate.Document doc = null;
    /*
    markupElementsMap = new HashMap();
    // populate it
    markupElementsMap.put ("S","Sentence");
    markupElementsMap.put ("s","Sentence");
    */
    // Create the element2String map
    Map<String, String> anElement2StringMap = new HashMap<String, String>();
    // Populate it
    anElement2StringMap.put("S", "\n");
    anElement2StringMap.put("s", "\n");
    doc = gate.Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/xml/xces.xml"), workingEncoding);
    AnnotationSet annotSet = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
    assertEquals("For " + doc.getSourceUrl() + " the number of annotations" + " should be:758", 758, annotSet.size());
    gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
    // Verifies if the maximum annotation ID on the GATE doc is less than the
    // Annotation ID generator of the document.
    verifyAnnotationIDGenerator(doc);
}
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) Document(gate.Document) URL(java.net.URL)

Example 29 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class TestXml method buildID2AnnotMap.

/**
 * Scans a target Doc for all Annotations and builds a map (from anot ID to annot) in the process
 * I also checks to see if there are two annotations with the same ID.
 * @param aDoc The GATE doc to be scaned
 * @return a Map ID2Annot
 */
private Map<Integer, Annotation> buildID2AnnotMap(Document aDoc) {
    Map<Integer, Annotation> id2AnnMap = new HashMap<Integer, Annotation>();
    // Scan the default annotation set
    AnnotationSet annotSet = aDoc.getAnnotations();
    addAnnotSet2Map(annotSet, id2AnnMap);
    // Scan all named annotation sets
    if (aDoc.getNamedAnnotationSets() != null) {
        for (Iterator<AnnotationSet> namedAnnotSetsIter = aDoc.getNamedAnnotationSets().values().iterator(); namedAnnotSetsIter.hasNext(); ) {
            addAnnotSet2Map(namedAnnotSetsIter.next(), id2AnnMap);
        }
    // End while
    }
    // End if
    return id2AnnMap;
}
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) Annotation(gate.Annotation)

Example 30 with AnnotationSet

use of gate.AnnotationSet in project gate-core by GateNLP.

the class TestSgml method testSgmlLoading.

// setUp
public void testSgmlLoading() throws Exception {
    assertTrue(true);
    // create the markupElementsMap map
    Map<String, String> markupElementsMap = null;
    gate.Document doc = null;
    /*
    markupElementsMap = new HashMap();
    // populate it
    markupElementsMap.put ("S","Sentence");
    markupElementsMap.put ("s","Sentence");
    markupElementsMap.put ("W","Word");
    markupElementsMap.put ("w","Word");
    */
    FeatureMap params = Factory.newFeatureMap();
    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(TestDocument.getTestServerName() + "tests/sgml/Hds.sgm"));
    params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
    doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
    // get the docFormat that deals with it.
    // the parameter MimeType doesn't affect right now the behaviour
    // *
    gate.DocumentFormat docFormat = gate.DocumentFormat.getDocumentFormat(doc, doc.getSourceUrl());
    assertTrue("Bad document Format was produced. SgmlDocumentFormat was expected", docFormat instanceof gate.corpora.SgmlDocumentFormat);
    // set's the map
    docFormat.setMarkupElementsMap(markupElementsMap);
    docFormat.unpackMarkup(doc, "DocumentContent");
    AnnotationSet annotSet = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
    assertEquals("For " + doc.getSourceUrl() + " the number of annotations" + " should be:1022", 1022, annotSet.size());
    // Verfy if all annotations from the default annotation set are consistent
    gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
}
Also used : FeatureMap(gate.FeatureMap) AnnotationSet(gate.AnnotationSet) Document(gate.Document) URL(java.net.URL)

Aggregations

AnnotationSet (gate.AnnotationSet)43 Annotation (gate.Annotation)27 ArrayList (java.util.ArrayList)14 HashMap (java.util.HashMap)11 HashSet (java.util.HashSet)11 Document (gate.Document)9 List (java.util.List)8 FeatureMap (gate.FeatureMap)7 InvalidOffsetException (gate.util.InvalidOffsetException)6 AnnotationSetImpl (gate.annotation.AnnotationSetImpl)5 Set (java.util.Set)5 StatusListener (gate.event.StatusListener)4 GateRuntimeException (gate.util.GateRuntimeException)4 Point (java.awt.Point)4 IOException (java.io.IOException)4 URL (java.net.URL)4 Map (java.util.Map)4 Color (java.awt.Color)3 TreeSet (java.util.TreeSet)3 TestDocument (gate.corpora.TestDocument)2