use of gate.Annotation in project gate-core by GateNLP.
the class CorefEditor method annotationRemoved.
@Override
public void annotationRemoved(AnnotationSetEvent ase) {
Annotation delAnnot = ase.getAnnotation();
Integer id = delAnnot.getId();
Object matchesMapObject = document.getFeatures().get(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME);
if (matchesMapObject == null)
return;
if (!(matchesMapObject instanceof Map)) {
// and return
return;
}
@SuppressWarnings("unchecked") Map<String, List<List<Integer>>> matchesMap = (Map<String, List<List<Integer>>>) matchesMapObject;
Set<String> keySet = matchesMap.keySet();
if (keySet == null)
return;
Iterator<String> iter = keySet.iterator();
boolean found = false;
while (iter.hasNext()) {
String currSet = iter.next();
List<List<Integer>> matches = matchesMap.get(currSet);
if (matches == null || matches.size() == 0)
continue;
else {
for (int i = 0; i < matches.size(); i++) {
List<Integer> ids = matches.get(i);
if (ids.contains(id)) {
// found
// so remove this
found = true;
ids.remove(id);
matches.set(i, ids);
break;
}
}
if (found) {
matchesMap.put(currSet, matches);
explicitCall = true;
document.getFeatures().put(ANNIEConstants.DOCUMENT_COREF_FEATURE_NAME, matchesMap);
explicitCall = false;
break;
}
}
}
if (found)
featureMapUpdated();
}
use of gate.Annotation in project gate-core by GateNLP.
the class CorpusQualityAssurance method readSetsTypesFeatures.
/**
* Update set lists.
* @param documentStart first document to read in the corpus,
* the first document of the corpus is 0.
*/
protected void readSetsTypesFeatures(final int documentStart) {
if (!isShowing()) {
return;
}
corpusChanged = false;
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setMaximum(corpus.size() - 1);
progressBar.setString("Read sets, types, features");
reloadCacheAction.setEnabled(false);
}
});
CorpusQualityAssurance.this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
Runnable runnable = new Runnable() {
@Override
public void run() {
if (docsSetsTypesFeatures.size() != corpus.getDocumentNames().size() || !docsSetsTypesFeatures.keySet().containsAll(corpus.getDocumentNames())) {
if (documentStart == 0) {
docsSetsTypesFeatures.clear();
}
TreeMap<String, TreeMap<String, TreeSet<String>>> setsTypesFeatures;
TreeMap<String, TreeSet<String>> typesFeatures;
TreeSet<String> features;
for (int i = documentStart; i < corpus.size(); i++) {
// fill in the lists of document, set, type and feature names
boolean documentWasLoaded = corpus.isDocumentLoaded(i);
Document document = corpus.get(i);
if (document != null && document.getAnnotationSetNames() != null) {
setsTypesFeatures = new TreeMap<String, TreeMap<String, TreeSet<String>>>(collator);
HashSet<String> setNames = new HashSet<String>(document.getAnnotationSetNames());
setNames.add("");
for (String set : setNames) {
typesFeatures = new TreeMap<String, TreeSet<String>>(collator);
AnnotationSet annotations = document.getAnnotations(set);
for (String type : annotations.getAllTypes()) {
features = new TreeSet<String>(collator);
for (Annotation annotation : annotations.get(type)) {
for (Object featureKey : annotation.getFeatures().keySet()) {
features.add((String) featureKey);
}
}
typesFeatures.put(type, features);
}
setsTypesFeatures.put(set, typesFeatures);
}
docsSetsTypesFeatures.put(document.getName(), setsTypesFeatures);
}
if (!documentWasLoaded) {
corpus.unloadDocument(document);
Factory.deleteResource(document);
}
final int progressValue = i + 1;
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setValue(progressValue);
if ((progressValue + 1) % 5 == 0) {
// update the set list every 5 documents read
updateSetList();
}
}
});
if (Thread.interrupted()) {
return;
}
}
}
updateSetList();
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setValue(progressBar.getMinimum());
progressBar.setString("");
CorpusQualityAssurance.this.setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
reloadCacheAction.setEnabled(true);
}
});
}
};
readSetsTypesFeaturesThread = new Thread(runnable, "readSetsTypesFeatures");
readSetsTypesFeaturesThread.setPriority(Thread.MIN_PRIORITY);
readSetsTypesFeaturesThread.start();
}
use of gate.Annotation in project gate-core by GateNLP.
the class CorpusQualityAssurance method compareAnnotation.
protected void compareAnnotation() {
int progressValuePrevious = -1;
if (readSetsTypesFeaturesThread != null && readSetsTypesFeaturesThread.isAlive()) {
// stop the thread that reads the sets, types and features
progressValuePrevious = progressBar.getValue();
readSetsTypesFeaturesThread.interrupt();
}
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setMaximum(corpus.size() - 1);
progressBar.setString("Compare annotations");
setList.setEnabled(false);
setCheck.setEnabled(false);
typeList.setEnabled(false);
typeCheck.setEnabled(false);
featureList.setEnabled(false);
featureCheck.setEnabled(false);
optionsButton.setEnabled(false);
measureTabbedPane.setEnabled(false);
measureList.setEnabled(false);
exportToHtmlAction.setEnabled(false);
reloadCacheAction.setEnabled(false);
}
});
boolean useBdm = false;
if (measuresType == FSCORE_MEASURES) {
differsByDocThenType.clear();
documentNames.clear();
for (Object measure : measureList.getSelectedValues()) {
if (((String) measure).contains("BDM")) {
useBdm = true;
break;
}
}
}
List<ClassificationMeasures> classificationMeasuresList = new ArrayList<ClassificationMeasures>();
List<OntologyMeasures> documentOntologyMeasuresList = new ArrayList<OntologyMeasures>();
List<OntologyMeasures> annotationOntologyMeasuresList = new ArrayList<OntologyMeasures>();
// for each document
for (int row = 0; row < corpus.size(); row++) {
boolean documentWasLoaded = corpus.isDocumentLoaded(row);
Document document = corpus.get(row);
documentNames.add(document.getName());
Set<Annotation> keys = new HashSet<Annotation>();
Set<Annotation> responses = new HashSet<Annotation>();
// get annotations from selected annotation sets
if (keySetName.equals("[Default set]")) {
keys = document.getAnnotations();
} else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(keySetName)) {
keys = document.getAnnotations(keySetName);
}
if (responseSetName.equals("[Default set]")) {
responses = document.getAnnotations();
} else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(responseSetName)) {
responses = document.getAnnotations(responseSetName);
}
if (!documentWasLoaded) {
// in case of datastore
corpus.unloadDocument(document);
Factory.deleteResource(document);
}
// add data to the fscore document table
if (measuresType == FSCORE_MEASURES) {
types.clear();
for (Object type : typeList.getSelectedValues()) {
types.add((String) type);
}
if (typeList.isSelectionEmpty()) {
for (int i = 0; i < typeList.getModel().getSize(); i++) {
types.add((String) typeList.getModel().getElementAt(i));
}
}
Set<String> featureSet = new HashSet<String>();
for (Object feature : featureList.getSelectedValues()) {
featureSet.add((String) feature);
}
HashMap<String, AnnotationDiffer> differsByType = new HashMap<String, AnnotationDiffer>();
AnnotationDiffer differ;
Set<Annotation> keysIter = new HashSet<Annotation>();
Set<Annotation> responsesIter = new HashSet<Annotation>();
for (String type : types) {
if (!keys.isEmpty() && !types.isEmpty()) {
keysIter = ((AnnotationSet) keys).get(type);
}
if (!responses.isEmpty() && !types.isEmpty()) {
responsesIter = ((AnnotationSet) responses).get(type);
}
differ = new AnnotationDiffer();
differ.setSignificantFeaturesSet(featureSet);
// compare
differ.calculateDiff(keysIter, responsesIter);
differsByType.put(type, differ);
}
differsByDocThenType.add(differsByType);
differ = new AnnotationDiffer(differsByType.values());
List<String> measuresRow;
if (useBdm) {
OntologyMeasures ontologyMeasures = new OntologyMeasures();
ontologyMeasures.setBdmFile(bdmFileUrl);
ontologyMeasures.calculateBdm(differsByType.values());
documentOntologyMeasuresList.add(ontologyMeasures);
measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
} else {
measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
}
documentTableModel.addRow(measuresRow.toArray());
// add data to the classification document table
} else if (measuresType == CLASSIFICATION_MEASURES && !keys.isEmpty() && !responses.isEmpty()) {
ClassificationMeasures classificationMeasures = new ClassificationMeasures();
classificationMeasures.calculateConfusionMatrix((AnnotationSet) keys, (AnnotationSet) responses, (String) typeList.getSelectedValue(), (String) featureList.getSelectedValue(), verboseOptionCheckBox.isSelected());
classificationMeasuresList.add(classificationMeasures);
List<String> measuresRow = classificationMeasures.getMeasuresRow(measure2List.getSelectedValues(), documentNames.get(documentNames.size() - 1));
document2TableModel.addRow(measuresRow.toArray());
List<List<String>> matrix = classificationMeasures.getConfusionMatrix(documentNames.get(documentNames.size() - 1));
for (List<String> matrixRow : matrix) {
while (confusionTableModel.getColumnCount() < matrix.size()) {
confusionTableModel.addColumn(" ");
}
confusionTableModel.addRow(matrixRow.toArray());
}
}
final int progressValue = row + 1;
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setValue(progressValue);
}
});
}
// add data to the fscore annotation table
if (measuresType == FSCORE_MEASURES) {
for (String type : types) {
ArrayList<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
for (HashMap<String, AnnotationDiffer> differsByType : differsByDocThenType) {
differs.add(differsByType.get(type));
}
List<String> measuresRow;
if (useBdm) {
OntologyMeasures ontologyMeasures = new OntologyMeasures();
ontologyMeasures.setBdmFile(bdmFileUrl);
ontologyMeasures.calculateBdm(differs);
annotationOntologyMeasuresList.add(ontologyMeasures);
measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), type);
} else {
AnnotationDiffer differ = new AnnotationDiffer(differs);
measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), type);
}
annotationTableModel.addRow(measuresRow.toArray());
}
}
// add summary rows to the fscore tables
if (measuresType == FSCORE_MEASURES) {
if (useBdm) {
OntologyMeasures ontologyMeasures = new OntologyMeasures(documentOntologyMeasuresList);
printSummary(ontologyMeasures, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
ontologyMeasures = new OntologyMeasures(annotationOntologyMeasuresList);
printSummary(ontologyMeasures, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
} else {
List<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
for (Map<String, AnnotationDiffer> differsByType : differsByDocThenType) {
differs.addAll(differsByType.values());
}
AnnotationDiffer differ = new AnnotationDiffer(differs);
printSummary(differ, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
printSummary(differ, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
}
// add summary rows to the classification tables
} else if (measuresType == CLASSIFICATION_MEASURES) {
ClassificationMeasures classificationMeasures = new ClassificationMeasures(classificationMeasuresList);
printSummary(classificationMeasures, document2TableModel, 3, document2TableModel.getRowCount(), measure2List.getSelectedValues());
List<List<String>> matrix = classificationMeasures.getConfusionMatrix("Whole corpus");
int insertionRow = 0;
for (List<String> row : matrix) {
while (confusionTableModel.getColumnCount() < matrix.size()) {
confusionTableModel.addColumn(" ");
}
confusionTableModel.insertRow(insertionRow++, row.toArray());
}
}
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setValue(progressBar.getMinimum());
progressBar.setString("");
setList.setEnabled(true);
setCheck.setEnabled(true);
typeList.setEnabled(true);
typeCheck.setEnabled(true);
featureList.setEnabled(true);
featureCheck.setEnabled(true);
optionsButton.setEnabled(true);
measureTabbedPane.setEnabled(true);
measureList.setEnabled(true);
exportToHtmlAction.setEnabled(true);
reloadCacheAction.setEnabled(true);
}
});
if (progressValuePrevious > -1) {
// restart the thread where it was interrupted
readSetsTypesFeatures(progressValuePrevious);
}
}
use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationMerging method mergeAnnotation.
/**
* Merge all annotationset from an array. If one annotation is in at least
* numK annotation sets, then put it into the merging annotation set.
*/
public static void mergeAnnotation(AnnotationSet[] annsArr, String nameFeat, HashMap<Annotation, String> mergeAnns, int numMinK, boolean isTheSameInstances) {
int numA = annsArr.length;
// First copy the annotatioin sets into a temp array
@SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
for (int i = 0; i < numA; ++i) {
if (annsArr[i] != null) {
annsArrTemp[i] = new HashSet<Annotation>();
for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
}
}
HashSet<String> featSet = new HashSet<String>();
if (nameFeat != null)
featSet.add(nameFeat);
if (numMinK < 1)
numMinK = 1;
for (int iA = 0; iA < numA - numMinK + 1; ++iA) {
if (annsArrTemp[iA] != null) {
for (Annotation ann : annsArrTemp[iA]) {
int numContained = 1;
StringBuffer featAdd = new StringBuffer();
featAdd.append(iA);
StringBuffer featDisa = new StringBuffer();
if (iA > 0) {
featDisa.append("0");
for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
}
int numDisagreed = iA;
for (int i = iA + 1; i < numA; ++i) {
boolean isContained = false;
if (annsArrTemp[i] != null) {
Annotation annT = null;
for (Annotation ann0 : annsArrTemp[i]) {
if (ann0.isCompatible(ann, featSet)) {
++numContained;
featAdd.append("-" + i);
annT = ann0;
isContained = true;
break;
}
}
if (isContained)
annsArrTemp[i].remove(annT);
}
if (!isContained) {
if (numDisagreed == 0)
featDisa.append(i);
else
featDisa.append("-" + i);
++numDisagreed;
}
}
if (numContained >= numMinK) {
mergeAnns.put(ann, featAdd.toString());
} else if (isTheSameInstances && nameFeat != null) {
ann.getFeatures().remove(nameFeat);
mergeAnns.put(ann, featAdd.toString());
}
}
}
}
// Remove the annotation in the same place
removeDuplicate(mergeAnns);
return;
}
use of gate.Annotation in project gate-core by GateNLP.
the class AnnotationMerging method mergeAnnotationMajority.
/**
* Merge all annotationset from an array. If one annotation is agreed by
* the majority of the annotators, then put it into the merging annotation set.
*/
public static void mergeAnnotationMajority(AnnotationSet[] annsArr, String nameFeat, HashMap<Annotation, String> mergeAnns, boolean isTheSameInstances) {
int numA = annsArr.length;
if (nameFeat == null) {
mergeAnnogationMajorityNoFeat(annsArr, mergeAnns, isTheSameInstances);
return;
}
// First copy the annotatioin sets into a temp array
@SuppressWarnings({ "unchecked", "rawtypes" }) Set<Annotation>[] annsArrTemp = new Set[numA];
for (int i = 0; i < numA; ++i) {
if (annsArr[i] != null) {
annsArrTemp[i] = new HashSet<Annotation>();
for (Annotation ann : annsArr[i]) annsArrTemp[i].add(ann);
}
}
for (int iA = 0; iA < numA; ++iA) {
if (annsArrTemp[iA] != null) {
for (Annotation ann : annsArrTemp[iA]) {
int numDisagreed = 0;
// Already the iA annotators don't agree the annotation
numDisagreed = iA;
StringBuffer featDisa = new StringBuffer();
if (iA > 0) {
featDisa.append("0");
for (int i = 1; i < iA; ++i) featDisa.append("-" + i);
}
HashMap<String, String> featOthers = new HashMap<String, String>();
String featTh = null;
if (ann.getFeatures().get(nameFeat) != null)
featTh = ann.getFeatures().get(nameFeat).toString();
featOthers.put(featTh, new Integer(iA).toString());
HashMap<String, Annotation> annAll = new HashMap<String, Annotation>();
annAll.put(featTh, ann);
for (int i = iA + 1; i < numA; ++i) {
boolean isContained = false;
if (annsArrTemp[i] != null) {
Annotation annT = null;
for (Annotation ann0 : annsArrTemp[i]) {
if (ann0.coextensive(ann)) {
String featValue = null;
if (ann0.getFeatures().get(nameFeat) != null)
featValue = ann0.getFeatures().get(nameFeat).toString();
if (!featOthers.containsKey(featValue)) {
featOthers.put(featValue, new Integer(i).toString());
annAll.put(featValue, ann0);
} else {
String str = featOthers.get(featValue);
featOthers.put(featValue, str + "-" + i);
}
annT = ann0;
isContained = true;
break;
}
}
if (isContained)
annsArrTemp[i].remove(annT);
}
if (!isContained) {
if (numDisagreed == 0)
featDisa.append(i);
else
featDisa.append("-" + i);
++numDisagreed;
}
}
// end of the loop for the following annotation set
int numAgreed = -1;
String agreeFeat = null;
for (String str : featOthers.keySet()) {
String str0 = featOthers.get(str);
int num = 1;
while (str0.contains("-")) {
++num;
str0 = str0.substring(str0.indexOf('-') + 1);
}
if (numAgreed < num) {
numAgreed = num;
agreeFeat = str;
}
}
if (numAgreed >= numDisagreed) {
mergeAnns.put(annAll.get(agreeFeat), featOthers.get(agreeFeat));
} else if (isTheSameInstances) {
if (ann.getFeatures().get(nameFeat) != null)
ann.getFeatures().remove(nameFeat);
mergeAnns.put(ann, featDisa.toString());
}
}
// for each ann in the current annotation set
}
}
return;
}
Aggregations