Search in sources :

Example 1 with BasicDataset

use of edu.cmu.minorthird.classify.BasicDataset in project lucida by claritylab.

the class HierarchicalClassifierTrainer method makeDataset.

private Dataset makeDataset(String fileName) {
    if (trainingLabels == null) {
        loadTraining = true;
        trainingLabels = new HashSet<String>();
    }
    Dataset set = new BasicDataset();
    extractor.setUseClassLevels(useClassLevels);
    extractor.setClassLevels(learnerNames.length);
    Example[] examples = extractor.loadFile(fileName);
    for (int i = 0; i < examples.length; i++) {
        String label = examples[i].getLabel().bestClassName();
        if (classLabels.contains(label)) {
            MutableInstance instance = new MutableInstance(examples[i].getSource(), examples[i].getSubpopulationId());
            Feature.Looper bLooper = examples[i].binaryFeatureIterator();
            while (bLooper.hasNext()) {
                Feature f = bLooper.nextFeature();
                if (featureTypes.contains(f.getPart(0))) {
                    instance.addBinary(f);
                }
            }
            Feature.Looper nLooper = examples[i].numericFeatureIterator();
            while (nLooper.hasNext()) {
                Feature f = nLooper.nextFeature();
                if (featureTypes.contains(f.getPart(0))) {
                    instance.addNumeric(f, examples[i].getWeight(f));
                }
            }
            Example example = new Example(instance, examples[i].getLabel());
            MLToolkit.println(example);
            if (loadTraining) {
                trainingLabels.add(label);
                set.add(example);
            } else {
                if (!trainingLabels.contains(label))
                    MLToolkit.println("Label of test example not found in training set (discarding): " + label);
                else
                    set.add(example);
            }
        } else {
            MLToolkit.println("Discarding example for Class: " + label);
        }
    }
    if (loadTraining)
        loadTraining = false;
    MLToolkit.println("Loaded " + set.size() + " examples for experiment from " + fileName);
    return set;
}
Also used : BasicDataset(edu.cmu.minorthird.classify.BasicDataset) CrossValidatedDataset(edu.cmu.minorthird.classify.experiments.CrossValidatedDataset) Dataset(edu.cmu.minorthird.classify.Dataset) Example(edu.cmu.minorthird.classify.Example) MutableInstance(edu.cmu.minorthird.classify.MutableInstance) BasicDataset(edu.cmu.minorthird.classify.BasicDataset) Feature(edu.cmu.minorthird.classify.Feature)

Example 2 with BasicDataset

use of edu.cmu.minorthird.classify.BasicDataset in project lucida by claritylab.

the class ScoreNormalizationFilter method createDataset.

/**
	 * Creates a training/evaluation set from serialized judged
	 * <code>Result</code> objects.
	 * 
	 * @param features selected features
	 * @param serializedDir directory containing serialized results
	 * @return training/evaluation set
	 */
private static Dataset createDataset(String[] features, String serializedDir) {
    Dataset set = new BasicDataset();
    File[] files = FileUtils.getFilesRec(serializedDir);
    for (File file : files) {
        // one file per question
        String filename = file.getName();
        if (!filename.endsWith(".serialized"))
            continue;
        // get question ID and results
        String qid = filename.replace(".serialized", "");
        Result[] results = readSerializedResults(file);
        // create examples and add to data set
        for (Result result : results) {
            // only factoid answers with 1 extraction technique
            if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY || result.getExtractionTechniques() == null || result.getExtractionTechniques().length != 1)
                continue;
            Example example = createExample(features, result, results, qid);
            set.add(example);
        }
    }
    return set;
}
Also used : BasicDataset(edu.cmu.minorthird.classify.BasicDataset) CrossValidatedDataset(edu.cmu.minorthird.classify.experiments.CrossValidatedDataset) Dataset(edu.cmu.minorthird.classify.Dataset) Example(edu.cmu.minorthird.classify.Example) BasicDataset(edu.cmu.minorthird.classify.BasicDataset) File(java.io.File) Result(info.ephyra.search.Result)

Aggregations

BasicDataset (edu.cmu.minorthird.classify.BasicDataset)2 Dataset (edu.cmu.minorthird.classify.Dataset)2 Example (edu.cmu.minorthird.classify.Example)2 CrossValidatedDataset (edu.cmu.minorthird.classify.experiments.CrossValidatedDataset)2 Feature (edu.cmu.minorthird.classify.Feature)1 MutableInstance (edu.cmu.minorthird.classify.MutableInstance)1 Result (info.ephyra.search.Result)1 File (java.io.File)1