Search in sources :

Example 6 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class ModelTrainerFactory method constructTrainer.

/**
 * Get a classification model trainer corresponding to a given outcome type
 * and a given algorithm.<br>
 * <br>
 * It is assumed the config file passed will be a local configuration, whose
 * root is equivalent to the talismane.machine-learning key in reference.conf
 */
public ClassificationModelTrainer constructTrainer(Config config) {
    config.checkValid(ConfigFactory.defaultReference().getConfig("talismane.machine-learning.generic"));
    MachineLearningAlgorithm algorithm = MachineLearningAlgorithm.valueOf(config.getString("algorithm"));
    ClassificationModelTrainer modelTrainer = null;
    switch(algorithm) {
        case MaxEnt:
            MaxentModelTrainer maxentModelTrainer = new MaxentModelTrainer();
            modelTrainer = maxentModelTrainer;
            break;
        case LinearSVM:
        case LinearSVMOneVsRest:
            LinearSVMModelTrainer linearSVMModelTrainer = new LinearSVMModelTrainer();
            modelTrainer = linearSVMModelTrainer;
            break;
        case Perceptron:
            PerceptronClassificationModelTrainer perceptronModelTrainer = new PerceptronClassificationModelTrainer();
            modelTrainer = perceptronModelTrainer;
            break;
        default:
            throw new JolicielException("Machine learning algorithm not yet supported: " + algorithm);
    }
    modelTrainer.setParameters(config);
    return modelTrainer;
}
Also used : LinearSVMModelTrainer(com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModelTrainer) JolicielException(com.joliciel.talismane.utils.JolicielException) PerceptronClassificationModelTrainer(com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModelTrainer) PerceptronClassificationModelTrainer(com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModelTrainer) MaxentModelTrainer(com.joliciel.talismane.machineLearning.maxent.MaxentModelTrainer)

Example 7 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class ExternalResourceFinder method addExternalResource.

/**
 * Add external resources located in a scanner from a particular filename.
 */
public void addExternalResource(String fileName, Scanner scanner) {
    LOG.debug("Reading " + fileName);
    String typeLine = scanner.nextLine();
    if (!typeLine.startsWith("Type: "))
        throw new JolicielException("In file " + fileName + ", expected line starting with \"Type: \"");
    String type = typeLine.substring("Type: ".length());
    if ("KeyValue".equals(type)) {
        TextFileResource textFileResource = new TextFileResource(fileName, scanner);
        this.addExternalResource(textFileResource);
    } else if ("KeyMultiValue".equals(type)) {
        TextFileMultivaluedResource resource = new TextFileMultivaluedResource(fileName, scanner);
        this.addExternalResource(resource);
    } else {
        throw new JolicielException("Unexpected type in file: " + fileName + ": " + type);
    }
}
Also used : JolicielException(com.joliciel.talismane.utils.JolicielException)

Example 8 with JolicielException

use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.

the class FScoreCalculator method combineCrossValidationResults.

/**
 * Combine the results of n cross validation results into a single f-score
 * file.
 */
static void combineCrossValidationResults(File directory, String prefix, String suffix, Writer csvFileWriter) {
    try {
        File[] files = directory.listFiles();
        Map<Integer, Map<String, FScoreStats>> fileStatsMap = new HashMap<Integer, Map<String, FScoreStats>>();
        for (File file : files) {
            if (file.getName().startsWith(prefix) && file.getName().endsWith(suffix)) {
                int index = Integer.parseInt(file.getName().substring(prefix.length(), prefix.length() + 1));
                Map<String, FScoreStats> statsMap = new HashMap<String, FScoreCalculator.FScoreStats>();
                fileStatsMap.put(index, statsMap);
                try (Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")))) {
                    boolean firstLine = true;
                    int truePositivePos = -1;
                    while (scanner.hasNextLine()) {
                        String line = scanner.nextLine();
                        List<String> cells = CSV.getCSVCells(line);
                        if (firstLine) {
                            int i = 0;
                            for (String cell : cells) {
                                if (cell.equals("true+")) {
                                    truePositivePos = i;
                                    break;
                                }
                                i++;
                            }
                            if (truePositivePos < 0) {
                                scanner.close();
                                throw new JolicielException("Couldn't find true+ on first line");
                            }
                            firstLine = false;
                        } else {
                            FScoreStats stats = new FScoreStats();
                            String outcome = cells.get(0);
                            stats.outcome = outcome;
                            if (outcome.equals("AVERAGE"))
                                break;
                            stats.truePos = Integer.parseInt(cells.get(truePositivePos));
                            stats.falsePos = Integer.parseInt(cells.get(truePositivePos + 1));
                            stats.falseNeg = Integer.parseInt(cells.get(truePositivePos + 2));
                            stats.count = Integer.parseInt(cells.get(truePositivePos + 3));
                            stats.precision = Double.parseDouble(cells.get(truePositivePos + 4));
                            stats.recall = Double.parseDouble(cells.get(truePositivePos + 5));
                            stats.fScore = Double.parseDouble(cells.get(truePositivePos + 6));
                            statsMap.put(outcome, stats);
                        }
                    // firstLine?
                    }
                // has more lines
                }
            // close scanner
            }
        // file in current series
        }
        // next file
        int numFiles = fileStatsMap.size();
        if (numFiles == 0) {
            throw new JolicielException("No files found matching prefix and suffix provided");
        }
        Map<String, DescriptiveStatistics> descriptiveStatsMap = new HashMap<String, DescriptiveStatistics>();
        Map<String, FScoreStats> outcomeStats = new HashMap<String, FScoreCalculator.FScoreStats>();
        Set<String> outcomes = new TreeSet<String>();
        for (Map<String, FScoreStats> statsMap : fileStatsMap.values()) {
            for (FScoreStats stats : statsMap.values()) {
                DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(stats.outcome + "fScore");
                if (fScoreStats == null) {
                    fScoreStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "fScore", fScoreStats);
                }
                fScoreStats.addValue(stats.fScore);
                DescriptiveStatistics precisionStats = descriptiveStatsMap.get(stats.outcome + "precision");
                if (precisionStats == null) {
                    precisionStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "precision", precisionStats);
                }
                precisionStats.addValue(stats.precision);
                DescriptiveStatistics recallStats = descriptiveStatsMap.get(stats.outcome + "recall");
                if (recallStats == null) {
                    recallStats = new DescriptiveStatistics();
                    descriptiveStatsMap.put(stats.outcome + "recall", recallStats);
                }
                recallStats.addValue(stats.recall);
                FScoreStats outcomeStat = outcomeStats.get(stats.outcome);
                if (outcomeStat == null) {
                    outcomeStat = new FScoreStats();
                    outcomeStat.outcome = stats.outcome;
                    outcomeStats.put(stats.outcome, outcomeStat);
                }
                outcomeStat.truePos += stats.truePos;
                outcomeStat.falsePos += stats.falsePos;
                outcomeStat.falseNeg += stats.falseNeg;
                outcomeStat.count += stats.count;
                outcomes.add(stats.outcome);
            }
        }
        csvFileWriter.write(CSV.format(prefix + suffix));
        csvFileWriter.write("\n");
        csvFileWriter.write(CSV.format("outcome"));
        csvFileWriter.write(CSV.format("true+") + CSV.format("false+") + CSV.format("false-") + CSV.format("count") + CSV.format("tot precision") + CSV.format("avg precision") + CSV.format("dev precision") + CSV.format("tot recall") + CSV.format("avg recall") + CSV.format("dev recall") + CSV.format("tot f-score") + CSV.format("avg f-score") + CSV.format("dev f-score") + "\n");
        for (String outcome : outcomes) {
            csvFileWriter.write(CSV.format(outcome));
            FScoreStats outcomeStat = outcomeStats.get(outcome);
            DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(outcome + "fScore");
            DescriptiveStatistics precisionStats = descriptiveStatsMap.get(outcome + "precision");
            DescriptiveStatistics recallStats = descriptiveStatsMap.get(outcome + "recall");
            outcomeStat.calculate();
            csvFileWriter.write(CSV.format(outcomeStat.truePos));
            csvFileWriter.write(CSV.format(outcomeStat.falsePos));
            csvFileWriter.write(CSV.format(outcomeStat.falseNeg));
            csvFileWriter.write(CSV.format(outcomeStat.count));
            csvFileWriter.write(CSV.format(outcomeStat.precision * 100));
            csvFileWriter.write(CSV.format(precisionStats.getMean()));
            csvFileWriter.write(CSV.format(precisionStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.recall * 100));
            csvFileWriter.write(CSV.format(recallStats.getMean()));
            csvFileWriter.write(CSV.format(recallStats.getStandardDeviation()));
            csvFileWriter.write(CSV.format(outcomeStat.fScore * 100));
            csvFileWriter.write(CSV.format(fScoreStats.getMean()));
            csvFileWriter.write(CSV.format(fScoreStats.getStandardDeviation()));
            csvFileWriter.write("\n");
            csvFileWriter.flush();
        }
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}
Also used : Scanner(java.util.Scanner) DescriptiveStatistics(org.apache.commons.math3.stat.descriptive.DescriptiveStatistics) JolicielException(com.joliciel.talismane.utils.JolicielException) InputStreamReader(java.io.InputStreamReader) HashMap(java.util.HashMap) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) TreeSet(java.util.TreeSet) BufferedReader(java.io.BufferedReader) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

JolicielException (com.joliciel.talismane.utils.JolicielException)8 ArrayList (java.util.ArrayList)3 IOException (java.io.IOException)2 List (java.util.List)2 Scanner (java.util.Scanner)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 ClassificationModel (com.joliciel.talismane.machineLearning.ClassificationModel)1 MachineLearningModel (com.joliciel.talismane.machineLearning.MachineLearningModel)1 LinearSVMModel (com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModel)1 LinearSVMModelTrainer (com.joliciel.talismane.machineLearning.linearsvm.LinearSVMModelTrainer)1 LinearSVMOneVsRestModel (com.joliciel.talismane.machineLearning.linearsvm.LinearSVMOneVsRestModel)1 MaxentModelTrainer (com.joliciel.talismane.machineLearning.maxent.MaxentModelTrainer)1 MaximumEntropyModel (com.joliciel.talismane.machineLearning.maxent.MaximumEntropyModel)1 PerceptronClassificationModel (com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModel)1 PerceptronClassificationModelTrainer (com.joliciel.talismane.machineLearning.perceptron.PerceptronClassificationModelTrainer)1 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)1 Feature (de.bwaldvogel.liblinear.Feature)1 Model (de.bwaldvogel.liblinear.Model)1 Parameter (de.bwaldvogel.liblinear.Parameter)1 Problem (de.bwaldvogel.liblinear.Problem)1