Search in sources :

Example 1 with Example

use of edu.cmu.minorthird.classify.Example in project lucida by claritylab.

the class HierarchicalClassifierLearner method addExample.

public void addExample(Example example) {
    for (int i = 0; i < prototypes.length; i++) {
        String labelName = example.getLabel().bestClassName();
        String prefix = getLabelPrefix(labelName, i);
        String sublabel = getSublabel(labelName, i);
        Example subExample = new Example(example.asInstance(), new ClassLabel(sublabel));
        ClassifierLearner subLearner = classifierLearners.get(prefix);
        subLearner.addExample(subExample);
    }
}
Also used : ClassifierLearner(edu.cmu.minorthird.classify.ClassifierLearner) ClassLabel(edu.cmu.minorthird.classify.ClassLabel) Example(edu.cmu.minorthird.classify.Example)

Example 2 with Example

use of edu.cmu.minorthird.classify.Example in project lucida by claritylab.

the class FeatureExtractor method loadFile.

/**
     * Loads an array of edu.cmu.minorthird.classify.Example objects from the file
     * at the given location, using {@link #datasetExamplePattern} and
     * {@link #createExample(String) createExample}.
     * 
     * @param fileName the name of the dataset file
     */
public Example[] loadFile(String fileName) {
    List<Example> examples = new ArrayList<Example>();
    String data = FileUtil.readFile(fileName, "UTF-8");
    Matcher m = datasetExamplePattern.matcher(data);
    while (m.find()) {
        try {
            Example[] exampleArr = createExample(m.group());
            for (Example example : exampleArr) {
                examples.add(example);
                numLoaded++;
            }
        } catch (Exception e) {
            log.error("Error reading Example from file: ", e);
        }
    }
    return (Example[]) examples.toArray(new Example[examples.size()]);
}
Also used : Matcher(java.util.regex.Matcher) Example(edu.cmu.minorthird.classify.Example) ArrayList(java.util.ArrayList)

Example 3 with Example

use of edu.cmu.minorthird.classify.Example in project lucida by claritylab.

the class ScoreNormalizationFilter method createExample.

/**
	 * Creates a training/evaluation example from a judged answer candidate.
	 * 
	 * @param features selected features
	 * @param result judged answer candidate
	 * @param results all answers to the question
	 * @param qid question ID
	 * @return training/evaluation example
	 */
private static Example createExample(String[] features, Result result, Result[] results, String qid) {
    // create instance with selected features
    Instance instance = createInstance(features, result, results, qid);
    // create example from the instance and its class label
    String label = result.isCorrect() ? ExampleSchema.POS_CLASS_NAME : ExampleSchema.NEG_CLASS_NAME;
    Example example = new Example(instance, new ClassLabel(label));
    return example;
}
Also used : ClassLabel(edu.cmu.minorthird.classify.ClassLabel) MutableInstance(edu.cmu.minorthird.classify.MutableInstance) Instance(edu.cmu.minorthird.classify.Instance) Example(edu.cmu.minorthird.classify.Example)

Example 4 with Example

use of edu.cmu.minorthird.classify.Example in project lucida by claritylab.

the class ScoreNormalizationFilter method createDataset.

/**
	 * Creates a training/evaluation set from serialized judged
	 * <code>Result</code> objects.
	 * 
	 * @param features selected features
	 * @param serializedDir directory containing serialized results
	 * @return training/evaluation set
	 */
private static Dataset createDataset(String[] features, String serializedDir) {
    Dataset set = new BasicDataset();
    File[] files = FileUtils.getFilesRec(serializedDir);
    for (File file : files) {
        // one file per question
        String filename = file.getName();
        if (!filename.endsWith(".serialized"))
            continue;
        // get question ID and results
        String qid = filename.replace(".serialized", "");
        Result[] results = readSerializedResults(file);
        // create examples and add to data set
        for (Result result : results) {
            // only factoid answers with 1 extraction technique
            if (result.getScore() <= 0 || result.getScore() == Float.POSITIVE_INFINITY || result.getExtractionTechniques() == null || result.getExtractionTechniques().length != 1)
                continue;
            Example example = createExample(features, result, results, qid);
            set.add(example);
        }
    }
    return set;
}
Also used : BasicDataset(edu.cmu.minorthird.classify.BasicDataset) CrossValidatedDataset(edu.cmu.minorthird.classify.experiments.CrossValidatedDataset) Dataset(edu.cmu.minorthird.classify.Dataset) Example(edu.cmu.minorthird.classify.Example) BasicDataset(edu.cmu.minorthird.classify.BasicDataset) File(java.io.File) Result(info.ephyra.search.Result)

Example 5 with Example

use of edu.cmu.minorthird.classify.Example in project lucida by claritylab.

the class HierarchicalClassifierTrainer method makeDataset.

private Dataset makeDataset(String fileName) {
    if (trainingLabels == null) {
        loadTraining = true;
        trainingLabels = new HashSet<String>();
    }
    Dataset set = new BasicDataset();
    extractor.setUseClassLevels(useClassLevels);
    extractor.setClassLevels(learnerNames.length);
    Example[] examples = extractor.loadFile(fileName);
    for (int i = 0; i < examples.length; i++) {
        String label = examples[i].getLabel().bestClassName();
        if (classLabels.contains(label)) {
            MutableInstance instance = new MutableInstance(examples[i].getSource(), examples[i].getSubpopulationId());
            Feature.Looper bLooper = examples[i].binaryFeatureIterator();
            while (bLooper.hasNext()) {
                Feature f = bLooper.nextFeature();
                if (featureTypes.contains(f.getPart(0))) {
                    instance.addBinary(f);
                }
            }
            Feature.Looper nLooper = examples[i].numericFeatureIterator();
            while (nLooper.hasNext()) {
                Feature f = nLooper.nextFeature();
                if (featureTypes.contains(f.getPart(0))) {
                    instance.addNumeric(f, examples[i].getWeight(f));
                }
            }
            Example example = new Example(instance, examples[i].getLabel());
            MLToolkit.println(example);
            if (loadTraining) {
                trainingLabels.add(label);
                set.add(example);
            } else {
                if (!trainingLabels.contains(label))
                    MLToolkit.println("Label of test example not found in training set (discarding): " + label);
                else
                    set.add(example);
            }
        } else {
            MLToolkit.println("Discarding example for Class: " + label);
        }
    }
    if (loadTraining)
        loadTraining = false;
    MLToolkit.println("Loaded " + set.size() + " examples for experiment from " + fileName);
    return set;
}
Also used : BasicDataset(edu.cmu.minorthird.classify.BasicDataset) CrossValidatedDataset(edu.cmu.minorthird.classify.experiments.CrossValidatedDataset) Dataset(edu.cmu.minorthird.classify.Dataset) Example(edu.cmu.minorthird.classify.Example) MutableInstance(edu.cmu.minorthird.classify.MutableInstance) BasicDataset(edu.cmu.minorthird.classify.BasicDataset) Feature(edu.cmu.minorthird.classify.Feature)

Aggregations

Example (edu.cmu.minorthird.classify.Example)7 ClassLabel (edu.cmu.minorthird.classify.ClassLabel)3 BasicDataset (edu.cmu.minorthird.classify.BasicDataset)2 Dataset (edu.cmu.minorthird.classify.Dataset)2 Feature (edu.cmu.minorthird.classify.Feature)2 Instance (edu.cmu.minorthird.classify.Instance)2 MutableInstance (edu.cmu.minorthird.classify.MutableInstance)2 CrossValidatedDataset (edu.cmu.minorthird.classify.experiments.CrossValidatedDataset)2 Matcher (java.util.regex.Matcher)2 ClassifierLearner (edu.cmu.minorthird.classify.ClassifierLearner)1 Result (info.ephyra.search.Result)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1