Search in sources :

Example 1 with AnnotationDiffer

use of gate.util.AnnotationDiffer in project gate-core by GateNLP.

the class CorpusQualityAssurance method compareAnnotation.

protected void compareAnnotation() {
    int progressValuePrevious = -1;
    if (readSetsTypesFeaturesThread != null && readSetsTypesFeaturesThread.isAlive()) {
        // stop the thread that reads the sets, types and features
        progressValuePrevious = progressBar.getValue();
        readSetsTypesFeaturesThread.interrupt();
    }
    SwingUtilities.invokeLater(new Runnable() {

        @Override
        public void run() {
            progressBar.setMaximum(corpus.size() - 1);
            progressBar.setString("Compare annotations");
            setList.setEnabled(false);
            setCheck.setEnabled(false);
            typeList.setEnabled(false);
            typeCheck.setEnabled(false);
            featureList.setEnabled(false);
            featureCheck.setEnabled(false);
            optionsButton.setEnabled(false);
            measureTabbedPane.setEnabled(false);
            measureList.setEnabled(false);
            exportToHtmlAction.setEnabled(false);
            reloadCacheAction.setEnabled(false);
        }
    });
    boolean useBdm = false;
    if (measuresType == FSCORE_MEASURES) {
        differsByDocThenType.clear();
        documentNames.clear();
        for (Object measure : measureList.getSelectedValues()) {
            if (((String) measure).contains("BDM")) {
                useBdm = true;
                break;
            }
        }
    }
    List<ClassificationMeasures> classificationMeasuresList = new ArrayList<ClassificationMeasures>();
    List<OntologyMeasures> documentOntologyMeasuresList = new ArrayList<OntologyMeasures>();
    List<OntologyMeasures> annotationOntologyMeasuresList = new ArrayList<OntologyMeasures>();
    // for each document
    for (int row = 0; row < corpus.size(); row++) {
        boolean documentWasLoaded = corpus.isDocumentLoaded(row);
        Document document = corpus.get(row);
        documentNames.add(document.getName());
        Set<Annotation> keys = new HashSet<Annotation>();
        Set<Annotation> responses = new HashSet<Annotation>();
        // get annotations from selected annotation sets
        if (keySetName.equals("[Default set]")) {
            keys = document.getAnnotations();
        } else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(keySetName)) {
            keys = document.getAnnotations(keySetName);
        }
        if (responseSetName.equals("[Default set]")) {
            responses = document.getAnnotations();
        } else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(responseSetName)) {
            responses = document.getAnnotations(responseSetName);
        }
        if (!documentWasLoaded) {
            // in case of datastore
            corpus.unloadDocument(document);
            Factory.deleteResource(document);
        }
        // add data to the fscore document table
        if (measuresType == FSCORE_MEASURES) {
            types.clear();
            for (Object type : typeList.getSelectedValues()) {
                types.add((String) type);
            }
            if (typeList.isSelectionEmpty()) {
                for (int i = 0; i < typeList.getModel().getSize(); i++) {
                    types.add((String) typeList.getModel().getElementAt(i));
                }
            }
            Set<String> featureSet = new HashSet<String>();
            for (Object feature : featureList.getSelectedValues()) {
                featureSet.add((String) feature);
            }
            HashMap<String, AnnotationDiffer> differsByType = new HashMap<String, AnnotationDiffer>();
            AnnotationDiffer differ;
            Set<Annotation> keysIter = new HashSet<Annotation>();
            Set<Annotation> responsesIter = new HashSet<Annotation>();
            for (String type : types) {
                if (!keys.isEmpty() && !types.isEmpty()) {
                    keysIter = ((AnnotationSet) keys).get(type);
                }
                if (!responses.isEmpty() && !types.isEmpty()) {
                    responsesIter = ((AnnotationSet) responses).get(type);
                }
                differ = new AnnotationDiffer();
                differ.setSignificantFeaturesSet(featureSet);
                // compare
                differ.calculateDiff(keysIter, responsesIter);
                differsByType.put(type, differ);
            }
            differsByDocThenType.add(differsByType);
            differ = new AnnotationDiffer(differsByType.values());
            List<String> measuresRow;
            if (useBdm) {
                OntologyMeasures ontologyMeasures = new OntologyMeasures();
                ontologyMeasures.setBdmFile(bdmFileUrl);
                ontologyMeasures.calculateBdm(differsByType.values());
                documentOntologyMeasuresList.add(ontologyMeasures);
                measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
            } else {
                measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
            }
            documentTableModel.addRow(measuresRow.toArray());
        // add data to the classification document table
        } else if (measuresType == CLASSIFICATION_MEASURES && !keys.isEmpty() && !responses.isEmpty()) {
            ClassificationMeasures classificationMeasures = new ClassificationMeasures();
            classificationMeasures.calculateConfusionMatrix((AnnotationSet) keys, (AnnotationSet) responses, (String) typeList.getSelectedValue(), (String) featureList.getSelectedValue(), verboseOptionCheckBox.isSelected());
            classificationMeasuresList.add(classificationMeasures);
            List<String> measuresRow = classificationMeasures.getMeasuresRow(measure2List.getSelectedValues(), documentNames.get(documentNames.size() - 1));
            document2TableModel.addRow(measuresRow.toArray());
            List<List<String>> matrix = classificationMeasures.getConfusionMatrix(documentNames.get(documentNames.size() - 1));
            for (List<String> matrixRow : matrix) {
                while (confusionTableModel.getColumnCount() < matrix.size()) {
                    confusionTableModel.addColumn(" ");
                }
                confusionTableModel.addRow(matrixRow.toArray());
            }
        }
        final int progressValue = row + 1;
        SwingUtilities.invokeLater(new Runnable() {

            @Override
            public void run() {
                progressBar.setValue(progressValue);
            }
        });
    }
    // add data to the fscore annotation table
    if (measuresType == FSCORE_MEASURES) {
        for (String type : types) {
            ArrayList<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
            for (HashMap<String, AnnotationDiffer> differsByType : differsByDocThenType) {
                differs.add(differsByType.get(type));
            }
            List<String> measuresRow;
            if (useBdm) {
                OntologyMeasures ontologyMeasures = new OntologyMeasures();
                ontologyMeasures.setBdmFile(bdmFileUrl);
                ontologyMeasures.calculateBdm(differs);
                annotationOntologyMeasuresList.add(ontologyMeasures);
                measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), type);
            } else {
                AnnotationDiffer differ = new AnnotationDiffer(differs);
                measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), type);
            }
            annotationTableModel.addRow(measuresRow.toArray());
        }
    }
    // add summary rows to the fscore tables
    if (measuresType == FSCORE_MEASURES) {
        if (useBdm) {
            OntologyMeasures ontologyMeasures = new OntologyMeasures(documentOntologyMeasuresList);
            printSummary(ontologyMeasures, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
            ontologyMeasures = new OntologyMeasures(annotationOntologyMeasuresList);
            printSummary(ontologyMeasures, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
        } else {
            List<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
            for (Map<String, AnnotationDiffer> differsByType : differsByDocThenType) {
                differs.addAll(differsByType.values());
            }
            AnnotationDiffer differ = new AnnotationDiffer(differs);
            printSummary(differ, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
            printSummary(differ, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
        }
    // add summary rows to the classification tables
    } else if (measuresType == CLASSIFICATION_MEASURES) {
        ClassificationMeasures classificationMeasures = new ClassificationMeasures(classificationMeasuresList);
        printSummary(classificationMeasures, document2TableModel, 3, document2TableModel.getRowCount(), measure2List.getSelectedValues());
        List<List<String>> matrix = classificationMeasures.getConfusionMatrix("Whole corpus");
        int insertionRow = 0;
        for (List<String> row : matrix) {
            while (confusionTableModel.getColumnCount() < matrix.size()) {
                confusionTableModel.addColumn(" ");
            }
            confusionTableModel.insertRow(insertionRow++, row.toArray());
        }
    }
    SwingUtilities.invokeLater(new Runnable() {

        @Override
        public void run() {
            progressBar.setValue(progressBar.getMinimum());
            progressBar.setString("");
            setList.setEnabled(true);
            setCheck.setEnabled(true);
            typeList.setEnabled(true);
            typeCheck.setEnabled(true);
            featureList.setEnabled(true);
            featureCheck.setEnabled(true);
            optionsButton.setEnabled(true);
            measureTabbedPane.setEnabled(true);
            measureList.setEnabled(true);
            exportToHtmlAction.setEnabled(true);
            reloadCacheAction.setEnabled(true);
        }
    });
    if (progressValuePrevious > -1) {
        // restart the thread where it was interrupted
        readSetsTypesFeatures(progressValuePrevious);
    }
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Document(gate.Document) ArrayList(java.util.ArrayList) List(java.util.List) JList(javax.swing.JList) AnnotationDiffer(gate.util.AnnotationDiffer) OntologyMeasures(gate.util.OntologyMeasures) HashSet(java.util.HashSet) Annotation(gate.Annotation) ClassificationMeasures(gate.util.ClassificationMeasures)

Example 2 with AnnotationDiffer

use of gate.util.AnnotationDiffer in project gate-core by GateNLP.

the class CorpusQualityAssurance method printSummary.

protected void printSummary(Object measureObject, DefaultTableModel tableModel, int columnGroupSize, int insertionRow, Object[] measures) {
    AnnotationDiffer differ = null;
    ClassificationMeasures classificationMeasures = null;
    OntologyMeasures ontologyMeasures = null;
    if (measureObject instanceof AnnotationDiffer) {
        differ = (AnnotationDiffer) measureObject;
    } else if (measureObject instanceof ClassificationMeasures) {
        classificationMeasures = (ClassificationMeasures) measureObject;
    } else if (measureObject instanceof OntologyMeasures) {
        ontologyMeasures = (OntologyMeasures) measureObject;
    }
    NumberFormat f = NumberFormat.getInstance(Locale.ENGLISH);
    f.setMaximumFractionDigits(4);
    f.setMinimumFractionDigits(4);
    f.setRoundingMode(RoundingMode.HALF_UP);
    List<Object> values = new ArrayList<Object>();
    // average measures by document
    values.add("Macro summary");
    for (int col = 1; col < tableModel.getColumnCount(); col++) {
        if (col < columnGroupSize) {
            values.add("");
        } else {
            float sumF = 0;
            for (int row = 0; row < tableModel.getRowCount(); row++) {
                try {
                    sumF += Float.parseFloat((String) tableModel.getValueAt(row, col));
                } catch (NumberFormatException e) {
                // do nothing
                }
            }
            values.add(f.format(sumF / tableModel.getRowCount()));
        }
    }
    tableModel.insertRow(insertionRow, values.toArray());
    // sum counts and recalculate measures like the corpus is one document
    values.clear();
    values.add("Micro summary");
    for (int col = 1; col < columnGroupSize; col++) {
        int sum = 0;
        for (int row = 0; row < tableModel.getRowCount() - 1; row++) {
            try {
                sum += Integer.parseInt((String) tableModel.getValueAt(row, col));
            } catch (NumberFormatException e) {
            // do nothing
            }
        }
        values.add(Integer.toString(sum));
    }
    if (measureObject instanceof OntologyMeasures) {
        List<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>(ontologyMeasures.getDifferByTypeMap().values());
        differ = new AnnotationDiffer(differs);
    }
    for (Object object : measures) {
        String measure = (String) object;
        int index = measure.indexOf('-');
        double beta = (index == -1) ? 1 : Double.valueOf(measure.substring(1, index));
        if (measure.endsWith("strict")) {
            values.add(f.format(differ.getPrecisionStrict()));
            values.add(f.format(differ.getRecallStrict()));
            values.add(f.format(differ.getFMeasureStrict(beta)));
        } else if (measure.endsWith("strict BDM")) {
            values.add(f.format(ontologyMeasures.getPrecisionStrictBdm()));
            values.add(f.format(ontologyMeasures.getRecallStrictBdm()));
            values.add(f.format(ontologyMeasures.getFMeasureStrictBdm(beta)));
        } else if (measure.endsWith("lenient")) {
            values.add(f.format(differ.getPrecisionLenient()));
            values.add(f.format(differ.getRecallLenient()));
            values.add(f.format(differ.getFMeasureLenient(beta)));
        } else if (measure.endsWith("lenient BDM")) {
            values.add(f.format(ontologyMeasures.getPrecisionLenientBdm()));
            values.add(f.format(ontologyMeasures.getRecallLenientBdm()));
            values.add(f.format(ontologyMeasures.getFMeasureLenientBdm(beta)));
        } else if (measure.endsWith("average")) {
            values.add(f.format(differ.getPrecisionAverage()));
            values.add(f.format(differ.getRecallAverage()));
            values.add(f.format(differ.getFMeasureAverage(beta)));
        } else if (measure.endsWith("average BDM")) {
            values.add(f.format(ontologyMeasures.getPrecisionAverageBdm()));
            values.add(f.format(ontologyMeasures.getRecallAverageBdm()));
            values.add(f.format(ontologyMeasures.getFMeasureAverageBdm(beta)));
        } else if (measure.equals("Observed agreement")) {
            values.add(f.format(classificationMeasures.getObservedAgreement()));
        } else if (measure.equals("Cohen's Kappa")) {
            float result = classificationMeasures.getKappaCohen();
            values.add(Float.isNaN(result) ? "" : f.format(result));
        } else if (measure.equals("Pi's Kappa")) {
            float result = classificationMeasures.getKappaPi();
            values.add(Float.isNaN(result) ? "" : f.format(result));
        }
    }
    tableModel.insertRow(insertionRow + 1, values.toArray());
}
Also used : ArrayList(java.util.ArrayList) ClassificationMeasures(gate.util.ClassificationMeasures) AnnotationDiffer(gate.util.AnnotationDiffer) OntologyMeasures(gate.util.OntologyMeasures) NumberFormat(java.text.NumberFormat)

Example 3 with AnnotationDiffer

use of gate.util.AnnotationDiffer in project gate-core by GateNLP.

the class AnnotationDiffGUI method initLocalData.

protected void initLocalData() {
    differ = new AnnotationDiffer();
    pairings = new ArrayList<AnnotationDiffer.Pairing>();
    keyCopyValueRows = new ArrayList<Boolean>();
    resCopyValueRows = new ArrayList<Boolean>();
    significantFeatures = new HashSet<String>();
    keyDoc = null;
    resDoc = null;
    Component root = SwingUtilities.getRoot(AnnotationDiffGUI.this);
    isStandalone = (root instanceof MainFrame);
}
Also used : AnnotationDiffer(gate.util.AnnotationDiffer) Component(java.awt.Component) JComponent(javax.swing.JComponent)

Aggregations

AnnotationDiffer (gate.util.AnnotationDiffer)3 ClassificationMeasures (gate.util.ClassificationMeasures)2 OntologyMeasures (gate.util.OntologyMeasures)2 ArrayList (java.util.ArrayList)2 Annotation (gate.Annotation)1 AnnotationSet (gate.AnnotationSet)1 Document (gate.Document)1 Component (java.awt.Component)1 NumberFormat (java.text.NumberFormat)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 JComponent (javax.swing.JComponent)1 JList (javax.swing.JList)1