use of gate.util.AnnotationDiffer in project gate-core by GateNLP.
the class CorpusQualityAssurance method compareAnnotation.
protected void compareAnnotation() {
int progressValuePrevious = -1;
if (readSetsTypesFeaturesThread != null && readSetsTypesFeaturesThread.isAlive()) {
// stop the thread that reads the sets, types and features
progressValuePrevious = progressBar.getValue();
readSetsTypesFeaturesThread.interrupt();
}
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setMaximum(corpus.size() - 1);
progressBar.setString("Compare annotations");
setList.setEnabled(false);
setCheck.setEnabled(false);
typeList.setEnabled(false);
typeCheck.setEnabled(false);
featureList.setEnabled(false);
featureCheck.setEnabled(false);
optionsButton.setEnabled(false);
measureTabbedPane.setEnabled(false);
measureList.setEnabled(false);
exportToHtmlAction.setEnabled(false);
reloadCacheAction.setEnabled(false);
}
});
boolean useBdm = false;
if (measuresType == FSCORE_MEASURES) {
differsByDocThenType.clear();
documentNames.clear();
for (Object measure : measureList.getSelectedValues()) {
if (((String) measure).contains("BDM")) {
useBdm = true;
break;
}
}
}
List<ClassificationMeasures> classificationMeasuresList = new ArrayList<ClassificationMeasures>();
List<OntologyMeasures> documentOntologyMeasuresList = new ArrayList<OntologyMeasures>();
List<OntologyMeasures> annotationOntologyMeasuresList = new ArrayList<OntologyMeasures>();
// for each document
for (int row = 0; row < corpus.size(); row++) {
boolean documentWasLoaded = corpus.isDocumentLoaded(row);
Document document = corpus.get(row);
documentNames.add(document.getName());
Set<Annotation> keys = new HashSet<Annotation>();
Set<Annotation> responses = new HashSet<Annotation>();
// get annotations from selected annotation sets
if (keySetName.equals("[Default set]")) {
keys = document.getAnnotations();
} else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(keySetName)) {
keys = document.getAnnotations(keySetName);
}
if (responseSetName.equals("[Default set]")) {
responses = document.getAnnotations();
} else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(responseSetName)) {
responses = document.getAnnotations(responseSetName);
}
if (!documentWasLoaded) {
// in case of datastore
corpus.unloadDocument(document);
Factory.deleteResource(document);
}
// add data to the fscore document table
if (measuresType == FSCORE_MEASURES) {
types.clear();
for (Object type : typeList.getSelectedValues()) {
types.add((String) type);
}
if (typeList.isSelectionEmpty()) {
for (int i = 0; i < typeList.getModel().getSize(); i++) {
types.add((String) typeList.getModel().getElementAt(i));
}
}
Set<String> featureSet = new HashSet<String>();
for (Object feature : featureList.getSelectedValues()) {
featureSet.add((String) feature);
}
HashMap<String, AnnotationDiffer> differsByType = new HashMap<String, AnnotationDiffer>();
AnnotationDiffer differ;
Set<Annotation> keysIter = new HashSet<Annotation>();
Set<Annotation> responsesIter = new HashSet<Annotation>();
for (String type : types) {
if (!keys.isEmpty() && !types.isEmpty()) {
keysIter = ((AnnotationSet) keys).get(type);
}
if (!responses.isEmpty() && !types.isEmpty()) {
responsesIter = ((AnnotationSet) responses).get(type);
}
differ = new AnnotationDiffer();
differ.setSignificantFeaturesSet(featureSet);
// compare
differ.calculateDiff(keysIter, responsesIter);
differsByType.put(type, differ);
}
differsByDocThenType.add(differsByType);
differ = new AnnotationDiffer(differsByType.values());
List<String> measuresRow;
if (useBdm) {
OntologyMeasures ontologyMeasures = new OntologyMeasures();
ontologyMeasures.setBdmFile(bdmFileUrl);
ontologyMeasures.calculateBdm(differsByType.values());
documentOntologyMeasuresList.add(ontologyMeasures);
measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
} else {
measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
}
documentTableModel.addRow(measuresRow.toArray());
// add data to the classification document table
} else if (measuresType == CLASSIFICATION_MEASURES && !keys.isEmpty() && !responses.isEmpty()) {
ClassificationMeasures classificationMeasures = new ClassificationMeasures();
classificationMeasures.calculateConfusionMatrix((AnnotationSet) keys, (AnnotationSet) responses, (String) typeList.getSelectedValue(), (String) featureList.getSelectedValue(), verboseOptionCheckBox.isSelected());
classificationMeasuresList.add(classificationMeasures);
List<String> measuresRow = classificationMeasures.getMeasuresRow(measure2List.getSelectedValues(), documentNames.get(documentNames.size() - 1));
document2TableModel.addRow(measuresRow.toArray());
List<List<String>> matrix = classificationMeasures.getConfusionMatrix(documentNames.get(documentNames.size() - 1));
for (List<String> matrixRow : matrix) {
while (confusionTableModel.getColumnCount() < matrix.size()) {
confusionTableModel.addColumn(" ");
}
confusionTableModel.addRow(matrixRow.toArray());
}
}
final int progressValue = row + 1;
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setValue(progressValue);
}
});
}
// add data to the fscore annotation table
if (measuresType == FSCORE_MEASURES) {
for (String type : types) {
ArrayList<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
for (HashMap<String, AnnotationDiffer> differsByType : differsByDocThenType) {
differs.add(differsByType.get(type));
}
List<String> measuresRow;
if (useBdm) {
OntologyMeasures ontologyMeasures = new OntologyMeasures();
ontologyMeasures.setBdmFile(bdmFileUrl);
ontologyMeasures.calculateBdm(differs);
annotationOntologyMeasuresList.add(ontologyMeasures);
measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), type);
} else {
AnnotationDiffer differ = new AnnotationDiffer(differs);
measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), type);
}
annotationTableModel.addRow(measuresRow.toArray());
}
}
// add summary rows to the fscore tables
if (measuresType == FSCORE_MEASURES) {
if (useBdm) {
OntologyMeasures ontologyMeasures = new OntologyMeasures(documentOntologyMeasuresList);
printSummary(ontologyMeasures, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
ontologyMeasures = new OntologyMeasures(annotationOntologyMeasuresList);
printSummary(ontologyMeasures, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
} else {
List<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
for (Map<String, AnnotationDiffer> differsByType : differsByDocThenType) {
differs.addAll(differsByType.values());
}
AnnotationDiffer differ = new AnnotationDiffer(differs);
printSummary(differ, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
printSummary(differ, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
}
// add summary rows to the classification tables
} else if (measuresType == CLASSIFICATION_MEASURES) {
ClassificationMeasures classificationMeasures = new ClassificationMeasures(classificationMeasuresList);
printSummary(classificationMeasures, document2TableModel, 3, document2TableModel.getRowCount(), measure2List.getSelectedValues());
List<List<String>> matrix = classificationMeasures.getConfusionMatrix("Whole corpus");
int insertionRow = 0;
for (List<String> row : matrix) {
while (confusionTableModel.getColumnCount() < matrix.size()) {
confusionTableModel.addColumn(" ");
}
confusionTableModel.insertRow(insertionRow++, row.toArray());
}
}
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressBar.setValue(progressBar.getMinimum());
progressBar.setString("");
setList.setEnabled(true);
setCheck.setEnabled(true);
typeList.setEnabled(true);
typeCheck.setEnabled(true);
featureList.setEnabled(true);
featureCheck.setEnabled(true);
optionsButton.setEnabled(true);
measureTabbedPane.setEnabled(true);
measureList.setEnabled(true);
exportToHtmlAction.setEnabled(true);
reloadCacheAction.setEnabled(true);
}
});
if (progressValuePrevious > -1) {
// restart the thread where it was interrupted
readSetsTypesFeatures(progressValuePrevious);
}
}
use of gate.util.AnnotationDiffer in project gate-core by GateNLP.
the class CorpusQualityAssurance method printSummary.
protected void printSummary(Object measureObject, DefaultTableModel tableModel, int columnGroupSize, int insertionRow, Object[] measures) {
AnnotationDiffer differ = null;
ClassificationMeasures classificationMeasures = null;
OntologyMeasures ontologyMeasures = null;
if (measureObject instanceof AnnotationDiffer) {
differ = (AnnotationDiffer) measureObject;
} else if (measureObject instanceof ClassificationMeasures) {
classificationMeasures = (ClassificationMeasures) measureObject;
} else if (measureObject instanceof OntologyMeasures) {
ontologyMeasures = (OntologyMeasures) measureObject;
}
NumberFormat f = NumberFormat.getInstance(Locale.ENGLISH);
f.setMaximumFractionDigits(4);
f.setMinimumFractionDigits(4);
f.setRoundingMode(RoundingMode.HALF_UP);
List<Object> values = new ArrayList<Object>();
// average measures by document
values.add("Macro summary");
for (int col = 1; col < tableModel.getColumnCount(); col++) {
if (col < columnGroupSize) {
values.add("");
} else {
float sumF = 0;
for (int row = 0; row < tableModel.getRowCount(); row++) {
try {
sumF += Float.parseFloat((String) tableModel.getValueAt(row, col));
} catch (NumberFormatException e) {
// do nothing
}
}
values.add(f.format(sumF / tableModel.getRowCount()));
}
}
tableModel.insertRow(insertionRow, values.toArray());
// sum counts and recalculate measures like the corpus is one document
values.clear();
values.add("Micro summary");
for (int col = 1; col < columnGroupSize; col++) {
int sum = 0;
for (int row = 0; row < tableModel.getRowCount() - 1; row++) {
try {
sum += Integer.parseInt((String) tableModel.getValueAt(row, col));
} catch (NumberFormatException e) {
// do nothing
}
}
values.add(Integer.toString(sum));
}
if (measureObject instanceof OntologyMeasures) {
List<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>(ontologyMeasures.getDifferByTypeMap().values());
differ = new AnnotationDiffer(differs);
}
for (Object object : measures) {
String measure = (String) object;
int index = measure.indexOf('-');
double beta = (index == -1) ? 1 : Double.valueOf(measure.substring(1, index));
if (measure.endsWith("strict")) {
values.add(f.format(differ.getPrecisionStrict()));
values.add(f.format(differ.getRecallStrict()));
values.add(f.format(differ.getFMeasureStrict(beta)));
} else if (measure.endsWith("strict BDM")) {
values.add(f.format(ontologyMeasures.getPrecisionStrictBdm()));
values.add(f.format(ontologyMeasures.getRecallStrictBdm()));
values.add(f.format(ontologyMeasures.getFMeasureStrictBdm(beta)));
} else if (measure.endsWith("lenient")) {
values.add(f.format(differ.getPrecisionLenient()));
values.add(f.format(differ.getRecallLenient()));
values.add(f.format(differ.getFMeasureLenient(beta)));
} else if (measure.endsWith("lenient BDM")) {
values.add(f.format(ontologyMeasures.getPrecisionLenientBdm()));
values.add(f.format(ontologyMeasures.getRecallLenientBdm()));
values.add(f.format(ontologyMeasures.getFMeasureLenientBdm(beta)));
} else if (measure.endsWith("average")) {
values.add(f.format(differ.getPrecisionAverage()));
values.add(f.format(differ.getRecallAverage()));
values.add(f.format(differ.getFMeasureAverage(beta)));
} else if (measure.endsWith("average BDM")) {
values.add(f.format(ontologyMeasures.getPrecisionAverageBdm()));
values.add(f.format(ontologyMeasures.getRecallAverageBdm()));
values.add(f.format(ontologyMeasures.getFMeasureAverageBdm(beta)));
} else if (measure.equals("Observed agreement")) {
values.add(f.format(classificationMeasures.getObservedAgreement()));
} else if (measure.equals("Cohen's Kappa")) {
float result = classificationMeasures.getKappaCohen();
values.add(Float.isNaN(result) ? "" : f.format(result));
} else if (measure.equals("Pi's Kappa")) {
float result = classificationMeasures.getKappaPi();
values.add(Float.isNaN(result) ? "" : f.format(result));
}
}
tableModel.insertRow(insertionRow + 1, values.toArray());
}
use of gate.util.AnnotationDiffer in project gate-core by GateNLP.
the class AnnotationDiffGUI method initLocalData.
protected void initLocalData() {
differ = new AnnotationDiffer();
pairings = new ArrayList<AnnotationDiffer.Pairing>();
keyCopyValueRows = new ArrayList<Boolean>();
resCopyValueRows = new ArrayList<Boolean>();
significantFeatures = new HashSet<String>();
keyDoc = null;
resDoc = null;
Component root = SwingUtilities.getRoot(AnnotationDiffGUI.this);
isStandalone = (root instanceof MainFrame);
}
Aggregations