Search in sources :

Example 1 with Classifier

use of weka.classifiers.Classifier in project dkpro-tc by dkpro.

the class WekaSerliazeModelConnector method writeWekaSpecificInformation.

private void writeWekaSpecificInformation(TaskContext aContext) throws Exception {
    boolean isMultiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
    boolean isRegression = learningMode.equals(Constants.LM_REGRESSION);
    File arffFileTrain = new File(aContext.getFolder(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY).getPath() + "/" + Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT);
    Instances trainData = WekaUtils.getInstances(arffFileTrain, isMultiLabel);
    trainData = WekaUtils.removeInstanceId(trainData, isMultiLabel);
    // FEATURE SELECTION
    if (!isMultiLabel) {
        if (featureSearcher != null && attributeEvaluator != null) {
            // Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
            throw new Exception("Feature Selection is currently not supported in Save Model mode.");
        }
    } else {
        if (attributeEvaluator != null && labelTransformationMethod != null && numLabelsToKeep > 0) {
            // Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
            throw new Exception("Feature Selection is currently not supported in Save Model mode.");
        }
    }
    // write training data header
    ObjectOutputStream outT = new ObjectOutputStream(new FileOutputStream(new File(outputFolder, "training_data")));
    Instances emptyTrainCopy = new Instances(trainData);
    emptyTrainCopy.delete();
    outT.writeObject(emptyTrainCopy);
    outT.close();
    // write model file
    Classifier cl = WekaUtils.getClassifier(learningMode, classificationArguments);
    cl.buildClassifier(trainData);
    File model = new File(outputFolder, MODEL_CLASSIFIER);
    mkdir(model.getParentFile());
    weka.core.SerializationHelper.write(model.getAbsolutePath(), cl);
    // write class labels file
    List<String> classLabels;
    if (!isRegression) {
        classLabels = WekaUtils.getClassLabels(trainData, isMultiLabel);
        String classLabelsString = StringUtils.join(classLabels, "\n");
        FileUtils.writeStringToFile(new File(outputFolder, MODEL_CLASS_LABELS), classLabelsString, "utf-8");
    }
}
Also used : Instances(weka.core.Instances) FileOutputStream(java.io.FileOutputStream) Classifier(weka.classifiers.Classifier) ObjectOutputStream(java.io.ObjectOutputStream) File(java.io.File) IOException(java.io.IOException)

Example 2 with Classifier

use of weka.classifiers.Classifier in project dkpro-tc by dkpro.

the class WekaUtils method getClassifier.

/**
 * Retrieves a classifier
 *
 * @param learningMode
 *            the learning mode
 * @param classificationArguments
 *            classifier arguments
 * @return classifier
 * @throws Exception
 *             in case of errors
 */
public static Classifier getClassifier(String learningMode, List<Object> classificationArguments) throws Exception {
    boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
    Classifier cl;
    if (multiLabel) {
        List<String> mlArgs = Arrays.asList(classificationArguments.subList(2, classificationArguments.size()).toArray(new String[0]));
        cl = AbstractClassifier.forName((String) classificationArguments.get(1), new String[] {});
        ((MultiLabelClassifier) cl).setOptions(mlArgs.toArray(new String[0]));
    } else {
        cl = AbstractClassifier.forName((String) classificationArguments.get(1), classificationArguments.subList(2, classificationArguments.size()).toArray(new String[0]));
    }
    return cl;
}
Also used : MultiLabelClassifier(meka.classifiers.multilabel.MultiLabelClassifier) AbstractClassifier(weka.classifiers.AbstractClassifier) Classifier(weka.classifiers.Classifier) MultiLabelClassifier(meka.classifiers.multilabel.MultiLabelClassifier)

Example 3 with Classifier

use of weka.classifiers.Classifier in project dkpro-tc by dkpro.

the class WekaArffTest method main.

/**
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    File train = new File("src/main/resources/arff/manyInstances/train.arff.gz");
    File test = new File("src/main/resources/arff/manyInstances/test.arff.gz");
    Instances trainData = WekaUtils.getInstances(train, false);
    Instances testData = WekaUtils.getInstances(test, false);
    Classifier cl = new NaiveBayes();
    // no problems until here
    Evaluation eval = new Evaluation(trainData);
    eval.evaluateModel(cl, testData);
}
Also used : Instances(weka.core.Instances) NaiveBayes(weka.classifiers.bayes.NaiveBayes) Evaluation(weka.classifiers.Evaluation) Classifier(weka.classifiers.Classifier) File(java.io.File)

Example 4 with Classifier

use of weka.classifiers.Classifier in project dkpro-tc by dkpro.

the class WekaTestTask method execute.

@Override
public void execute(TaskContext aContext) throws Exception {
    boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
    File arffFileTrain = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TRAINING_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
    File arffFileTest = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TEST_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
    Instances trainData = WekaUtils.getInstances(arffFileTrain, multiLabel);
    Instances testData = WekaUtils.getInstances(arffFileTest, multiLabel);
    // do not balance in regression experiments
    if (!learningMode.equals(Constants.LM_REGRESSION)) {
        testData = WekaUtils.makeOutcomeClassesCompatible(trainData, testData, multiLabel);
    }
    Instances copyTestData = new Instances(testData);
    trainData = WekaUtils.removeInstanceId(trainData, multiLabel);
    testData = WekaUtils.removeInstanceId(testData, multiLabel);
    // FEATURE SELECTION
    if (!learningMode.equals(Constants.LM_MULTI_LABEL)) {
        if (featureSearcher != null && attributeEvaluator != null) {
            AttributeSelection attSel = WekaUtils.featureSelectionSinglelabel(aContext, trainData, featureSearcher, attributeEvaluator);
            File file = WekaUtils.getFile(aContext, "", WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
            FileUtils.writeStringToFile(file, attSel.toResultsString(), "utf-8");
            if (applySelection) {
                Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
                trainData = attSel.reduceDimensionality(trainData);
                testData = attSel.reduceDimensionality(testData);
            }
        }
    } else {
        if (attributeEvaluator != null && labelTransformationMethod != null && numLabelsToKeep > 0) {
            Remove attSel = WekaUtils.featureSelectionMultilabel(aContext, trainData, attributeEvaluator, labelTransformationMethod, numLabelsToKeep);
            if (applySelection) {
                Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
                trainData = WekaUtils.applyAttributeSelectionFilter(trainData, attSel);
                testData = WekaUtils.applyAttributeSelectionFilter(testData, attSel);
            }
        }
    }
    // build classifier
    Classifier cl = WekaUtils.getClassifier(learningMode, classificationArguments);
    // file to hold prediction results
    File evalOutput = WekaUtils.getFile(aContext, "", evaluationBin, AccessMode.READWRITE);
    // evaluation & prediction generation
    if (multiLabel) {
        // we don't need to build the classifier - meka does this
        // internally
        Result r = WekaUtils.getEvaluationMultilabel(cl, trainData, testData, threshold);
        WekaUtils.writeMlResultToFile(new MultilabelResult(r.allTrueValues(), r.allPredictions(), threshold), evalOutput);
        testData = WekaUtils.getPredictionInstancesMultiLabel(testData, cl, WekaUtils.getMekaThreshold(threshold, r, trainData));
        testData = WekaUtils.addInstanceId(testData, copyTestData, true);
    } else {
        // train the classifier on the train set split - not necessary in multilabel setup, but
        // in single label setup
        cl.buildClassifier(trainData);
        weka.core.SerializationHelper.write(evalOutput.getAbsolutePath(), WekaUtils.getEvaluationSinglelabel(cl, trainData, testData));
        testData = WekaUtils.getPredictionInstancesSingleLabel(testData, cl);
        testData = WekaUtils.addInstanceId(testData, copyTestData, false);
    }
    // Write out the prediction - the data sink expects an .arff ending file so we game it a bit
    // and rename the file afterwards to .txt
    File predictionFile = WekaUtils.getFile(aContext, "", Constants.FILENAME_PREDICTIONS, AccessMode.READWRITE);
    File arffDummy = new File(predictionFile.getParent(), "prediction.arff");
    DataSink.write(arffDummy.getAbsolutePath(), testData);
    FileUtils.moveFile(arffDummy, predictionFile);
}
Also used : Instances(weka.core.Instances) AttributeSelection(weka.attributeSelection.AttributeSelection) MultilabelResult(org.dkpro.tc.ml.weka.util.MultilabelResult) Remove(weka.filters.unsupervised.attribute.Remove) Classifier(weka.classifiers.Classifier) File(java.io.File) MultilabelResult(org.dkpro.tc.ml.weka.util.MultilabelResult) Result(meka.core.Result)

Aggregations

Classifier (weka.classifiers.Classifier)4 File (java.io.File)3 Instances (weka.core.Instances)3 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 ObjectOutputStream (java.io.ObjectOutputStream)1 MultiLabelClassifier (meka.classifiers.multilabel.MultiLabelClassifier)1 Result (meka.core.Result)1 MultilabelResult (org.dkpro.tc.ml.weka.util.MultilabelResult)1 AttributeSelection (weka.attributeSelection.AttributeSelection)1 AbstractClassifier (weka.classifiers.AbstractClassifier)1 Evaluation (weka.classifiers.Evaluation)1 NaiveBayes (weka.classifiers.bayes.NaiveBayes)1 Remove (weka.filters.unsupervised.attribute.Remove)1