use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaUtilTest method makeOutcomeClassesCompatibleTest.
@Test
public void makeOutcomeClassesCompatibleTest() throws Exception {
Instances train = WekaUtils.getInstances(new File("src/test/resources/utils/train.arff"), false);
Instances test = WekaUtils.getInstances(new File("src/test/resources/utils/test.arff"), false);
Instances testCompatible = WekaUtils.makeOutcomeClassesCompatible(train, test, false);
System.out.println(WekaUtils.getClassLabels(testCompatible, false));
assertEquals(2, WekaUtils.getClassLabels(testCompatible, false).size());
}
use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaUtilTest method tcInstanceToWekaInstanceRegressionTest.
@Test
public void tcInstanceToWekaInstanceRegressionTest() throws Exception {
Instance i1 = new Instance();
i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
Instance i2 = new Instance();
i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
attributes.add(new Attribute("feature5"));
attributes.add(new Attribute("feature2"));
attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
attributes.add(new Attribute("feature1"));
attributes.add(new Attribute("outcome"));
Instances trainingData = new Instances("test", attributes, 0);
weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, null, true);
weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, null, true);
assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
assertEquals(5, wekaInstance1.numAttributes());
wekaInstance1.dataset().add(wekaInstance1);
wekaInstance2.dataset().add(wekaInstance2);
System.out.println(wekaInstance1.dataset() + "\n");
System.out.println(wekaInstance2.dataset() + "\n");
}
use of weka.core.Instances in project dkpro-tc by dkpro.
the class WekaTestTask method execute.
@Override
public void execute(TaskContext aContext) throws Exception {
boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
File arffFileTrain = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TRAINING_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
File arffFileTest = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TEST_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
Instances trainData = WekaUtils.getInstances(arffFileTrain, multiLabel);
Instances testData = WekaUtils.getInstances(arffFileTest, multiLabel);
// do not balance in regression experiments
if (!learningMode.equals(Constants.LM_REGRESSION)) {
testData = WekaUtils.makeOutcomeClassesCompatible(trainData, testData, multiLabel);
}
Instances copyTestData = new Instances(testData);
trainData = WekaUtils.removeInstanceId(trainData, multiLabel);
testData = WekaUtils.removeInstanceId(testData, multiLabel);
// FEATURE SELECTION
if (!learningMode.equals(Constants.LM_MULTI_LABEL)) {
if (featureSearcher != null && attributeEvaluator != null) {
AttributeSelection attSel = WekaUtils.featureSelectionSinglelabel(aContext, trainData, featureSearcher, attributeEvaluator);
File file = WekaUtils.getFile(aContext, "", WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
FileUtils.writeStringToFile(file, attSel.toResultsString(), "utf-8");
if (applySelection) {
Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
trainData = attSel.reduceDimensionality(trainData);
testData = attSel.reduceDimensionality(testData);
}
}
} else {
if (attributeEvaluator != null && labelTransformationMethod != null && numLabelsToKeep > 0) {
Remove attSel = WekaUtils.featureSelectionMultilabel(aContext, trainData, attributeEvaluator, labelTransformationMethod, numLabelsToKeep);
if (applySelection) {
Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
trainData = WekaUtils.applyAttributeSelectionFilter(trainData, attSel);
testData = WekaUtils.applyAttributeSelectionFilter(testData, attSel);
}
}
}
// build classifier
Classifier cl = WekaUtils.getClassifier(learningMode, classificationArguments);
// file to hold prediction results
File evalOutput = WekaUtils.getFile(aContext, "", evaluationBin, AccessMode.READWRITE);
// evaluation & prediction generation
if (multiLabel) {
// we don't need to build the classifier - meka does this
// internally
Result r = WekaUtils.getEvaluationMultilabel(cl, trainData, testData, threshold);
WekaUtils.writeMlResultToFile(new MultilabelResult(r.allTrueValues(), r.allPredictions(), threshold), evalOutput);
testData = WekaUtils.getPredictionInstancesMultiLabel(testData, cl, WekaUtils.getMekaThreshold(threshold, r, trainData));
testData = WekaUtils.addInstanceId(testData, copyTestData, true);
} else {
// train the classifier on the train set split - not necessary in multilabel setup, but
// in single label setup
cl.buildClassifier(trainData);
weka.core.SerializationHelper.write(evalOutput.getAbsolutePath(), WekaUtils.getEvaluationSinglelabel(cl, trainData, testData));
testData = WekaUtils.getPredictionInstancesSingleLabel(testData, cl);
testData = WekaUtils.addInstanceId(testData, copyTestData, false);
}
// Write out the prediction - the data sink expects an .arff ending file so we game it a bit
// and rename the file afterwards to .txt
File predictionFile = WekaUtils.getFile(aContext, "", Constants.FILENAME_PREDICTIONS, AccessMode.READWRITE);
File arffDummy = new File(predictionFile.getParent(), "prediction.arff");
DataSink.write(arffDummy.getAbsolutePath(), testData);
FileUtils.moveFile(arffDummy, predictionFile);
}
use of weka.core.Instances in project iobserve-analysis by research-iobserve.
the class TVectorQuantizationClustering method printInstances.
private void printInstances(final ClusteringResults results) {
results.printClusteringResults();
final Instances centroids = results.getClusteringMetrics().getCentroids();
for (int i = 0; i < centroids.numInstances(); i++) {
String logString = "";
logString += "***************************";
logString += "Cluster " + i;
logString += "***************************";
final Instance instance = centroids.instance(i);
for (int a = 0; a < instance.numAttributes(); a++) {
logString += centroids.attribute(a).name() + " : " + instance.value(a);
}
TVectorQuantizationClustering.LOGGER.info(logString);
}
}
use of weka.core.Instances in project iobserve-analysis by research-iobserve.
the class ClusterMerger method execute.
/*
* (non-Javadoc)
*
* @see teetime.framework.AbstractConsumerStage#execute(java.lang.Object)
*/
@Override
protected void execute(final Map<Integer, List<Pair<Instance, Double>>> clustering) throws Exception {
/**
* simply pick the first instance of every cluster lookup attributes to build a new
* instances Object
*/
Instance instance = clustering.entrySet().iterator().next().getValue().get(0).getElement1();
final FastVector attributes = new FastVector();
for (int j = 0; j < instance.numAttributes(); j++) {
attributes.addElement(instance.attribute(j));
}
final Instances result = new Instances("Clustering Result", attributes, clustering.size());
for (final List<Pair<Instance, Double>> entry : clustering.values()) {
if (!entry.isEmpty()) {
instance = entry.get(0).getElement1();
result.add(instance);
}
}
this.printInstances(result);
this.outputPort.send(result);
}
Aggregations