use of weka.classifiers.Classifier in project dkpro-tc by dkpro.
the class WekaSerliazeModelConnector method writeWekaSpecificInformation.
private void writeWekaSpecificInformation(TaskContext aContext) throws Exception {
boolean isMultiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
boolean isRegression = learningMode.equals(Constants.LM_REGRESSION);
File arffFileTrain = new File(aContext.getFolder(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY).getPath() + "/" + Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT);
Instances trainData = WekaUtils.getInstances(arffFileTrain, isMultiLabel);
trainData = WekaUtils.removeInstanceId(trainData, isMultiLabel);
// FEATURE SELECTION
if (!isMultiLabel) {
if (featureSearcher != null && attributeEvaluator != null) {
// Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
throw new Exception("Feature Selection is currently not supported in Save Model mode.");
}
} else {
if (attributeEvaluator != null && labelTransformationMethod != null && numLabelsToKeep > 0) {
// Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
throw new Exception("Feature Selection is currently not supported in Save Model mode.");
}
}
// write training data header
ObjectOutputStream outT = new ObjectOutputStream(new FileOutputStream(new File(outputFolder, "training_data")));
Instances emptyTrainCopy = new Instances(trainData);
emptyTrainCopy.delete();
outT.writeObject(emptyTrainCopy);
outT.close();
// write model file
Classifier cl = WekaUtils.getClassifier(learningMode, classificationArguments);
cl.buildClassifier(trainData);
File model = new File(outputFolder, MODEL_CLASSIFIER);
mkdir(model.getParentFile());
weka.core.SerializationHelper.write(model.getAbsolutePath(), cl);
// write class labels file
List<String> classLabels;
if (!isRegression) {
classLabels = WekaUtils.getClassLabels(trainData, isMultiLabel);
String classLabelsString = StringUtils.join(classLabels, "\n");
FileUtils.writeStringToFile(new File(outputFolder, MODEL_CLASS_LABELS), classLabelsString, "utf-8");
}
}
use of weka.classifiers.Classifier in project dkpro-tc by dkpro.
the class WekaUtils method getClassifier.
/**
* Retrieves a classifier
*
* @param learningMode
* the learning mode
* @param classificationArguments
* classifier arguments
* @return classifier
* @throws Exception
* in case of errors
*/
public static Classifier getClassifier(String learningMode, List<Object> classificationArguments) throws Exception {
boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
Classifier cl;
if (multiLabel) {
List<String> mlArgs = Arrays.asList(classificationArguments.subList(2, classificationArguments.size()).toArray(new String[0]));
cl = AbstractClassifier.forName((String) classificationArguments.get(1), new String[] {});
((MultiLabelClassifier) cl).setOptions(mlArgs.toArray(new String[0]));
} else {
cl = AbstractClassifier.forName((String) classificationArguments.get(1), classificationArguments.subList(2, classificationArguments.size()).toArray(new String[0]));
}
return cl;
}
use of weka.classifiers.Classifier in project dkpro-tc by dkpro.
the class WekaArffTest method main.
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
File train = new File("src/main/resources/arff/manyInstances/train.arff.gz");
File test = new File("src/main/resources/arff/manyInstances/test.arff.gz");
Instances trainData = WekaUtils.getInstances(train, false);
Instances testData = WekaUtils.getInstances(test, false);
Classifier cl = new NaiveBayes();
// no problems until here
Evaluation eval = new Evaluation(trainData);
eval.evaluateModel(cl, testData);
}
use of weka.classifiers.Classifier in project dkpro-tc by dkpro.
the class WekaTestTask method execute.
@Override
public void execute(TaskContext aContext) throws Exception {
boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
File arffFileTrain = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TRAINING_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
File arffFileTest = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TEST_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
Instances trainData = WekaUtils.getInstances(arffFileTrain, multiLabel);
Instances testData = WekaUtils.getInstances(arffFileTest, multiLabel);
// do not balance in regression experiments
if (!learningMode.equals(Constants.LM_REGRESSION)) {
testData = WekaUtils.makeOutcomeClassesCompatible(trainData, testData, multiLabel);
}
Instances copyTestData = new Instances(testData);
trainData = WekaUtils.removeInstanceId(trainData, multiLabel);
testData = WekaUtils.removeInstanceId(testData, multiLabel);
// FEATURE SELECTION
if (!learningMode.equals(Constants.LM_MULTI_LABEL)) {
if (featureSearcher != null && attributeEvaluator != null) {
AttributeSelection attSel = WekaUtils.featureSelectionSinglelabel(aContext, trainData, featureSearcher, attributeEvaluator);
File file = WekaUtils.getFile(aContext, "", WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
FileUtils.writeStringToFile(file, attSel.toResultsString(), "utf-8");
if (applySelection) {
Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
trainData = attSel.reduceDimensionality(trainData);
testData = attSel.reduceDimensionality(testData);
}
}
} else {
if (attributeEvaluator != null && labelTransformationMethod != null && numLabelsToKeep > 0) {
Remove attSel = WekaUtils.featureSelectionMultilabel(aContext, trainData, attributeEvaluator, labelTransformationMethod, numLabelsToKeep);
if (applySelection) {
Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
trainData = WekaUtils.applyAttributeSelectionFilter(trainData, attSel);
testData = WekaUtils.applyAttributeSelectionFilter(testData, attSel);
}
}
}
// build classifier
Classifier cl = WekaUtils.getClassifier(learningMode, classificationArguments);
// file to hold prediction results
File evalOutput = WekaUtils.getFile(aContext, "", evaluationBin, AccessMode.READWRITE);
// evaluation & prediction generation
if (multiLabel) {
// we don't need to build the classifier - meka does this
// internally
Result r = WekaUtils.getEvaluationMultilabel(cl, trainData, testData, threshold);
WekaUtils.writeMlResultToFile(new MultilabelResult(r.allTrueValues(), r.allPredictions(), threshold), evalOutput);
testData = WekaUtils.getPredictionInstancesMultiLabel(testData, cl, WekaUtils.getMekaThreshold(threshold, r, trainData));
testData = WekaUtils.addInstanceId(testData, copyTestData, true);
} else {
// train the classifier on the train set split - not necessary in multilabel setup, but
// in single label setup
cl.buildClassifier(trainData);
weka.core.SerializationHelper.write(evalOutput.getAbsolutePath(), WekaUtils.getEvaluationSinglelabel(cl, trainData, testData));
testData = WekaUtils.getPredictionInstancesSingleLabel(testData, cl);
testData = WekaUtils.addInstanceId(testData, copyTestData, false);
}
// Write out the prediction - the data sink expects an .arff ending file so we game it a bit
// and rename the file afterwards to .txt
File predictionFile = WekaUtils.getFile(aContext, "", Constants.FILENAME_PREDICTIONS, AccessMode.READWRITE);
File arffDummy = new File(predictionFile.getParent(), "prediction.arff");
DataSink.write(arffDummy.getAbsolutePath(), testData);
FileUtils.moveFile(arffDummy, predictionFile);
}
Aggregations