use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class ModelTrainerFactory method constructTrainer.
/**
* Get a classification model trainer corresponding to a given outcome type
* and a given algorithm.<br>
* <br>
* It is assumed the config file passed will be a local configuration, whose
* root is equivalent to the talismane.machine-learning key in reference.conf
*/
public ClassificationModelTrainer constructTrainer(Config config) {
config.checkValid(ConfigFactory.defaultReference().getConfig("talismane.machine-learning.generic"));
MachineLearningAlgorithm algorithm = MachineLearningAlgorithm.valueOf(config.getString("algorithm"));
ClassificationModelTrainer modelTrainer = null;
switch(algorithm) {
case MaxEnt:
MaxentModelTrainer maxentModelTrainer = new MaxentModelTrainer();
modelTrainer = maxentModelTrainer;
break;
case LinearSVM:
case LinearSVMOneVsRest:
LinearSVMModelTrainer linearSVMModelTrainer = new LinearSVMModelTrainer();
modelTrainer = linearSVMModelTrainer;
break;
case Perceptron:
PerceptronClassificationModelTrainer perceptronModelTrainer = new PerceptronClassificationModelTrainer();
modelTrainer = perceptronModelTrainer;
break;
default:
throw new JolicielException("Machine learning algorithm not yet supported: " + algorithm);
}
modelTrainer.setParameters(config);
return modelTrainer;
}
use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class ExternalResourceFinder method addExternalResource.
/**
* Add external resources located in a scanner from a particular filename.
*/
public void addExternalResource(String fileName, Scanner scanner) {
LOG.debug("Reading " + fileName);
String typeLine = scanner.nextLine();
if (!typeLine.startsWith("Type: "))
throw new JolicielException("In file " + fileName + ", expected line starting with \"Type: \"");
String type = typeLine.substring("Type: ".length());
if ("KeyValue".equals(type)) {
TextFileResource textFileResource = new TextFileResource(fileName, scanner);
this.addExternalResource(textFileResource);
} else if ("KeyMultiValue".equals(type)) {
TextFileMultivaluedResource resource = new TextFileMultivaluedResource(fileName, scanner);
this.addExternalResource(resource);
} else {
throw new JolicielException("Unexpected type in file: " + fileName + ": " + type);
}
}
use of com.joliciel.talismane.utils.JolicielException in project talismane by joliciel-informatique.
the class FScoreCalculator method combineCrossValidationResults.
/**
* Combine the results of n cross validation results into a single f-score
* file.
*/
static void combineCrossValidationResults(File directory, String prefix, String suffix, Writer csvFileWriter) {
try {
File[] files = directory.listFiles();
Map<Integer, Map<String, FScoreStats>> fileStatsMap = new HashMap<Integer, Map<String, FScoreStats>>();
for (File file : files) {
if (file.getName().startsWith(prefix) && file.getName().endsWith(suffix)) {
int index = Integer.parseInt(file.getName().substring(prefix.length(), prefix.length() + 1));
Map<String, FScoreStats> statsMap = new HashMap<String, FScoreCalculator.FScoreStats>();
fileStatsMap.put(index, statsMap);
try (Scanner scanner = new Scanner(new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")))) {
boolean firstLine = true;
int truePositivePos = -1;
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
List<String> cells = CSV.getCSVCells(line);
if (firstLine) {
int i = 0;
for (String cell : cells) {
if (cell.equals("true+")) {
truePositivePos = i;
break;
}
i++;
}
if (truePositivePos < 0) {
scanner.close();
throw new JolicielException("Couldn't find true+ on first line");
}
firstLine = false;
} else {
FScoreStats stats = new FScoreStats();
String outcome = cells.get(0);
stats.outcome = outcome;
if (outcome.equals("AVERAGE"))
break;
stats.truePos = Integer.parseInt(cells.get(truePositivePos));
stats.falsePos = Integer.parseInt(cells.get(truePositivePos + 1));
stats.falseNeg = Integer.parseInt(cells.get(truePositivePos + 2));
stats.count = Integer.parseInt(cells.get(truePositivePos + 3));
stats.precision = Double.parseDouble(cells.get(truePositivePos + 4));
stats.recall = Double.parseDouble(cells.get(truePositivePos + 5));
stats.fScore = Double.parseDouble(cells.get(truePositivePos + 6));
statsMap.put(outcome, stats);
}
// firstLine?
}
// has more lines
}
// close scanner
}
// file in current series
}
// next file
int numFiles = fileStatsMap.size();
if (numFiles == 0) {
throw new JolicielException("No files found matching prefix and suffix provided");
}
Map<String, DescriptiveStatistics> descriptiveStatsMap = new HashMap<String, DescriptiveStatistics>();
Map<String, FScoreStats> outcomeStats = new HashMap<String, FScoreCalculator.FScoreStats>();
Set<String> outcomes = new TreeSet<String>();
for (Map<String, FScoreStats> statsMap : fileStatsMap.values()) {
for (FScoreStats stats : statsMap.values()) {
DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(stats.outcome + "fScore");
if (fScoreStats == null) {
fScoreStats = new DescriptiveStatistics();
descriptiveStatsMap.put(stats.outcome + "fScore", fScoreStats);
}
fScoreStats.addValue(stats.fScore);
DescriptiveStatistics precisionStats = descriptiveStatsMap.get(stats.outcome + "precision");
if (precisionStats == null) {
precisionStats = new DescriptiveStatistics();
descriptiveStatsMap.put(stats.outcome + "precision", precisionStats);
}
precisionStats.addValue(stats.precision);
DescriptiveStatistics recallStats = descriptiveStatsMap.get(stats.outcome + "recall");
if (recallStats == null) {
recallStats = new DescriptiveStatistics();
descriptiveStatsMap.put(stats.outcome + "recall", recallStats);
}
recallStats.addValue(stats.recall);
FScoreStats outcomeStat = outcomeStats.get(stats.outcome);
if (outcomeStat == null) {
outcomeStat = new FScoreStats();
outcomeStat.outcome = stats.outcome;
outcomeStats.put(stats.outcome, outcomeStat);
}
outcomeStat.truePos += stats.truePos;
outcomeStat.falsePos += stats.falsePos;
outcomeStat.falseNeg += stats.falseNeg;
outcomeStat.count += stats.count;
outcomes.add(stats.outcome);
}
}
csvFileWriter.write(CSV.format(prefix + suffix));
csvFileWriter.write("\n");
csvFileWriter.write(CSV.format("outcome"));
csvFileWriter.write(CSV.format("true+") + CSV.format("false+") + CSV.format("false-") + CSV.format("count") + CSV.format("tot precision") + CSV.format("avg precision") + CSV.format("dev precision") + CSV.format("tot recall") + CSV.format("avg recall") + CSV.format("dev recall") + CSV.format("tot f-score") + CSV.format("avg f-score") + CSV.format("dev f-score") + "\n");
for (String outcome : outcomes) {
csvFileWriter.write(CSV.format(outcome));
FScoreStats outcomeStat = outcomeStats.get(outcome);
DescriptiveStatistics fScoreStats = descriptiveStatsMap.get(outcome + "fScore");
DescriptiveStatistics precisionStats = descriptiveStatsMap.get(outcome + "precision");
DescriptiveStatistics recallStats = descriptiveStatsMap.get(outcome + "recall");
outcomeStat.calculate();
csvFileWriter.write(CSV.format(outcomeStat.truePos));
csvFileWriter.write(CSV.format(outcomeStat.falsePos));
csvFileWriter.write(CSV.format(outcomeStat.falseNeg));
csvFileWriter.write(CSV.format(outcomeStat.count));
csvFileWriter.write(CSV.format(outcomeStat.precision * 100));
csvFileWriter.write(CSV.format(precisionStats.getMean()));
csvFileWriter.write(CSV.format(precisionStats.getStandardDeviation()));
csvFileWriter.write(CSV.format(outcomeStat.recall * 100));
csvFileWriter.write(CSV.format(recallStats.getMean()));
csvFileWriter.write(CSV.format(recallStats.getStandardDeviation()));
csvFileWriter.write(CSV.format(outcomeStat.fScore * 100));
csvFileWriter.write(CSV.format(fScoreStats.getMean()));
csvFileWriter.write(CSV.format(fScoreStats.getStandardDeviation()));
csvFileWriter.write("\n");
csvFileWriter.flush();
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
Aggregations