Search in sources :

Example 1 with ClassificationMeasures

use of gate.util.ClassificationMeasures in project gate-core by GateNLP.

the class CorpusQualityAssurance method compareAnnotation.

protected void compareAnnotation() {
    int progressValuePrevious = -1;
    if (readSetsTypesFeaturesThread != null && readSetsTypesFeaturesThread.isAlive()) {
        // stop the thread that reads the sets, types and features
        progressValuePrevious = progressBar.getValue();
        readSetsTypesFeaturesThread.interrupt();
    }
    SwingUtilities.invokeLater(new Runnable() {

        @Override
        public void run() {
            progressBar.setMaximum(corpus.size() - 1);
            progressBar.setString("Compare annotations");
            setList.setEnabled(false);
            setCheck.setEnabled(false);
            typeList.setEnabled(false);
            typeCheck.setEnabled(false);
            featureList.setEnabled(false);
            featureCheck.setEnabled(false);
            optionsButton.setEnabled(false);
            measureTabbedPane.setEnabled(false);
            measureList.setEnabled(false);
            exportToHtmlAction.setEnabled(false);
            reloadCacheAction.setEnabled(false);
        }
    });
    boolean useBdm = false;
    if (measuresType == FSCORE_MEASURES) {
        differsByDocThenType.clear();
        documentNames.clear();
        for (Object measure : measureList.getSelectedValues()) {
            if (((String) measure).contains("BDM")) {
                useBdm = true;
                break;
            }
        }
    }
    List<ClassificationMeasures> classificationMeasuresList = new ArrayList<ClassificationMeasures>();
    List<OntologyMeasures> documentOntologyMeasuresList = new ArrayList<OntologyMeasures>();
    List<OntologyMeasures> annotationOntologyMeasuresList = new ArrayList<OntologyMeasures>();
    // for each document
    for (int row = 0; row < corpus.size(); row++) {
        boolean documentWasLoaded = corpus.isDocumentLoaded(row);
        Document document = corpus.get(row);
        documentNames.add(document.getName());
        Set<Annotation> keys = new HashSet<Annotation>();
        Set<Annotation> responses = new HashSet<Annotation>();
        // get annotations from selected annotation sets
        if (keySetName.equals("[Default set]")) {
            keys = document.getAnnotations();
        } else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(keySetName)) {
            keys = document.getAnnotations(keySetName);
        }
        if (responseSetName.equals("[Default set]")) {
            responses = document.getAnnotations();
        } else if (document.getAnnotationSetNames() != null && document.getAnnotationSetNames().contains(responseSetName)) {
            responses = document.getAnnotations(responseSetName);
        }
        if (!documentWasLoaded) {
            // in case of datastore
            corpus.unloadDocument(document);
            Factory.deleteResource(document);
        }
        // add data to the fscore document table
        if (measuresType == FSCORE_MEASURES) {
            types.clear();
            for (Object type : typeList.getSelectedValues()) {
                types.add((String) type);
            }
            if (typeList.isSelectionEmpty()) {
                for (int i = 0; i < typeList.getModel().getSize(); i++) {
                    types.add((String) typeList.getModel().getElementAt(i));
                }
            }
            Set<String> featureSet = new HashSet<String>();
            for (Object feature : featureList.getSelectedValues()) {
                featureSet.add((String) feature);
            }
            HashMap<String, AnnotationDiffer> differsByType = new HashMap<String, AnnotationDiffer>();
            AnnotationDiffer differ;
            Set<Annotation> keysIter = new HashSet<Annotation>();
            Set<Annotation> responsesIter = new HashSet<Annotation>();
            for (String type : types) {
                if (!keys.isEmpty() && !types.isEmpty()) {
                    keysIter = ((AnnotationSet) keys).get(type);
                }
                if (!responses.isEmpty() && !types.isEmpty()) {
                    responsesIter = ((AnnotationSet) responses).get(type);
                }
                differ = new AnnotationDiffer();
                differ.setSignificantFeaturesSet(featureSet);
                // compare
                differ.calculateDiff(keysIter, responsesIter);
                differsByType.put(type, differ);
            }
            differsByDocThenType.add(differsByType);
            differ = new AnnotationDiffer(differsByType.values());
            List<String> measuresRow;
            if (useBdm) {
                OntologyMeasures ontologyMeasures = new OntologyMeasures();
                ontologyMeasures.setBdmFile(bdmFileUrl);
                ontologyMeasures.calculateBdm(differsByType.values());
                documentOntologyMeasuresList.add(ontologyMeasures);
                measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
            } else {
                measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), documentNames.get(documentNames.size() - 1));
            }
            documentTableModel.addRow(measuresRow.toArray());
        // add data to the classification document table
        } else if (measuresType == CLASSIFICATION_MEASURES && !keys.isEmpty() && !responses.isEmpty()) {
            ClassificationMeasures classificationMeasures = new ClassificationMeasures();
            classificationMeasures.calculateConfusionMatrix((AnnotationSet) keys, (AnnotationSet) responses, (String) typeList.getSelectedValue(), (String) featureList.getSelectedValue(), verboseOptionCheckBox.isSelected());
            classificationMeasuresList.add(classificationMeasures);
            List<String> measuresRow = classificationMeasures.getMeasuresRow(measure2List.getSelectedValues(), documentNames.get(documentNames.size() - 1));
            document2TableModel.addRow(measuresRow.toArray());
            List<List<String>> matrix = classificationMeasures.getConfusionMatrix(documentNames.get(documentNames.size() - 1));
            for (List<String> matrixRow : matrix) {
                while (confusionTableModel.getColumnCount() < matrix.size()) {
                    confusionTableModel.addColumn(" ");
                }
                confusionTableModel.addRow(matrixRow.toArray());
            }
        }
        final int progressValue = row + 1;
        SwingUtilities.invokeLater(new Runnable() {

            @Override
            public void run() {
                progressBar.setValue(progressValue);
            }
        });
    }
    // add data to the fscore annotation table
    if (measuresType == FSCORE_MEASURES) {
        for (String type : types) {
            ArrayList<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
            for (HashMap<String, AnnotationDiffer> differsByType : differsByDocThenType) {
                differs.add(differsByType.get(type));
            }
            List<String> measuresRow;
            if (useBdm) {
                OntologyMeasures ontologyMeasures = new OntologyMeasures();
                ontologyMeasures.setBdmFile(bdmFileUrl);
                ontologyMeasures.calculateBdm(differs);
                annotationOntologyMeasuresList.add(ontologyMeasures);
                measuresRow = ontologyMeasures.getMeasuresRow(measureList.getSelectedValues(), type);
            } else {
                AnnotationDiffer differ = new AnnotationDiffer(differs);
                measuresRow = differ.getMeasuresRow(measureList.getSelectedValues(), type);
            }
            annotationTableModel.addRow(measuresRow.toArray());
        }
    }
    // add summary rows to the fscore tables
    if (measuresType == FSCORE_MEASURES) {
        if (useBdm) {
            OntologyMeasures ontologyMeasures = new OntologyMeasures(documentOntologyMeasuresList);
            printSummary(ontologyMeasures, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
            ontologyMeasures = new OntologyMeasures(annotationOntologyMeasuresList);
            printSummary(ontologyMeasures, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
        } else {
            List<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>();
            for (Map<String, AnnotationDiffer> differsByType : differsByDocThenType) {
                differs.addAll(differsByType.values());
            }
            AnnotationDiffer differ = new AnnotationDiffer(differs);
            printSummary(differ, documentTableModel, 5, documentTableModel.getRowCount(), measureList.getSelectedValues());
            printSummary(differ, annotationTableModel, 5, annotationTableModel.getRowCount(), measureList.getSelectedValues());
        }
    // add summary rows to the classification tables
    } else if (measuresType == CLASSIFICATION_MEASURES) {
        ClassificationMeasures classificationMeasures = new ClassificationMeasures(classificationMeasuresList);
        printSummary(classificationMeasures, document2TableModel, 3, document2TableModel.getRowCount(), measure2List.getSelectedValues());
        List<List<String>> matrix = classificationMeasures.getConfusionMatrix("Whole corpus");
        int insertionRow = 0;
        for (List<String> row : matrix) {
            while (confusionTableModel.getColumnCount() < matrix.size()) {
                confusionTableModel.addColumn(" ");
            }
            confusionTableModel.insertRow(insertionRow++, row.toArray());
        }
    }
    SwingUtilities.invokeLater(new Runnable() {

        @Override
        public void run() {
            progressBar.setValue(progressBar.getMinimum());
            progressBar.setString("");
            setList.setEnabled(true);
            setCheck.setEnabled(true);
            typeList.setEnabled(true);
            typeCheck.setEnabled(true);
            featureList.setEnabled(true);
            featureCheck.setEnabled(true);
            optionsButton.setEnabled(true);
            measureTabbedPane.setEnabled(true);
            measureList.setEnabled(true);
            exportToHtmlAction.setEnabled(true);
            reloadCacheAction.setEnabled(true);
        }
    });
    if (progressValuePrevious > -1) {
        // restart the thread where it was interrupted
        readSetsTypesFeatures(progressValuePrevious);
    }
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Document(gate.Document) ArrayList(java.util.ArrayList) List(java.util.List) JList(javax.swing.JList) AnnotationDiffer(gate.util.AnnotationDiffer) OntologyMeasures(gate.util.OntologyMeasures) HashSet(java.util.HashSet) Annotation(gate.Annotation) ClassificationMeasures(gate.util.ClassificationMeasures)

Example 2 with ClassificationMeasures

use of gate.util.ClassificationMeasures in project gate-core by GateNLP.

the class CorpusQualityAssurance method printSummary.

protected void printSummary(Object measureObject, DefaultTableModel tableModel, int columnGroupSize, int insertionRow, Object[] measures) {
    AnnotationDiffer differ = null;
    ClassificationMeasures classificationMeasures = null;
    OntologyMeasures ontologyMeasures = null;
    if (measureObject instanceof AnnotationDiffer) {
        differ = (AnnotationDiffer) measureObject;
    } else if (measureObject instanceof ClassificationMeasures) {
        classificationMeasures = (ClassificationMeasures) measureObject;
    } else if (measureObject instanceof OntologyMeasures) {
        ontologyMeasures = (OntologyMeasures) measureObject;
    }
    NumberFormat f = NumberFormat.getInstance(Locale.ENGLISH);
    f.setMaximumFractionDigits(4);
    f.setMinimumFractionDigits(4);
    f.setRoundingMode(RoundingMode.HALF_UP);
    List<Object> values = new ArrayList<Object>();
    // average measures by document
    values.add("Macro summary");
    for (int col = 1; col < tableModel.getColumnCount(); col++) {
        if (col < columnGroupSize) {
            values.add("");
        } else {
            float sumF = 0;
            for (int row = 0; row < tableModel.getRowCount(); row++) {
                try {
                    sumF += Float.parseFloat((String) tableModel.getValueAt(row, col));
                } catch (NumberFormatException e) {
                // do nothing
                }
            }
            values.add(f.format(sumF / tableModel.getRowCount()));
        }
    }
    tableModel.insertRow(insertionRow, values.toArray());
    // sum counts and recalculate measures like the corpus is one document
    values.clear();
    values.add("Micro summary");
    for (int col = 1; col < columnGroupSize; col++) {
        int sum = 0;
        for (int row = 0; row < tableModel.getRowCount() - 1; row++) {
            try {
                sum += Integer.parseInt((String) tableModel.getValueAt(row, col));
            } catch (NumberFormatException e) {
            // do nothing
            }
        }
        values.add(Integer.toString(sum));
    }
    if (measureObject instanceof OntologyMeasures) {
        List<AnnotationDiffer> differs = new ArrayList<AnnotationDiffer>(ontologyMeasures.getDifferByTypeMap().values());
        differ = new AnnotationDiffer(differs);
    }
    for (Object object : measures) {
        String measure = (String) object;
        int index = measure.indexOf('-');
        double beta = (index == -1) ? 1 : Double.valueOf(measure.substring(1, index));
        if (measure.endsWith("strict")) {
            values.add(f.format(differ.getPrecisionStrict()));
            values.add(f.format(differ.getRecallStrict()));
            values.add(f.format(differ.getFMeasureStrict(beta)));
        } else if (measure.endsWith("strict BDM")) {
            values.add(f.format(ontologyMeasures.getPrecisionStrictBdm()));
            values.add(f.format(ontologyMeasures.getRecallStrictBdm()));
            values.add(f.format(ontologyMeasures.getFMeasureStrictBdm(beta)));
        } else if (measure.endsWith("lenient")) {
            values.add(f.format(differ.getPrecisionLenient()));
            values.add(f.format(differ.getRecallLenient()));
            values.add(f.format(differ.getFMeasureLenient(beta)));
        } else if (measure.endsWith("lenient BDM")) {
            values.add(f.format(ontologyMeasures.getPrecisionLenientBdm()));
            values.add(f.format(ontologyMeasures.getRecallLenientBdm()));
            values.add(f.format(ontologyMeasures.getFMeasureLenientBdm(beta)));
        } else if (measure.endsWith("average")) {
            values.add(f.format(differ.getPrecisionAverage()));
            values.add(f.format(differ.getRecallAverage()));
            values.add(f.format(differ.getFMeasureAverage(beta)));
        } else if (measure.endsWith("average BDM")) {
            values.add(f.format(ontologyMeasures.getPrecisionAverageBdm()));
            values.add(f.format(ontologyMeasures.getRecallAverageBdm()));
            values.add(f.format(ontologyMeasures.getFMeasureAverageBdm(beta)));
        } else if (measure.equals("Observed agreement")) {
            values.add(f.format(classificationMeasures.getObservedAgreement()));
        } else if (measure.equals("Cohen's Kappa")) {
            float result = classificationMeasures.getKappaCohen();
            values.add(Float.isNaN(result) ? "" : f.format(result));
        } else if (measure.equals("Pi's Kappa")) {
            float result = classificationMeasures.getKappaPi();
            values.add(Float.isNaN(result) ? "" : f.format(result));
        }
    }
    tableModel.insertRow(insertionRow + 1, values.toArray());
}
Also used : ArrayList(java.util.ArrayList) ClassificationMeasures(gate.util.ClassificationMeasures) AnnotationDiffer(gate.util.AnnotationDiffer) OntologyMeasures(gate.util.OntologyMeasures) NumberFormat(java.text.NumberFormat)

Aggregations

AnnotationDiffer (gate.util.AnnotationDiffer)2 ClassificationMeasures (gate.util.ClassificationMeasures)2 OntologyMeasures (gate.util.OntologyMeasures)2 ArrayList (java.util.ArrayList)2 Annotation (gate.Annotation)1 AnnotationSet (gate.AnnotationSet)1 Document (gate.Document)1 NumberFormat (java.text.NumberFormat)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 JList (javax.swing.JList)1