Search in sources :

Example 26 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaUtilTest method makeOutcomeClassesCompatibleTest.

@Test
public void makeOutcomeClassesCompatibleTest() throws Exception {
    Instances train = WekaUtils.getInstances(new File("src/test/resources/utils/train.arff"), false);
    Instances test = WekaUtils.getInstances(new File("src/test/resources/utils/test.arff"), false);
    Instances testCompatible = WekaUtils.makeOutcomeClassesCompatible(train, test, false);
    System.out.println(WekaUtils.getClassLabels(testCompatible, false));
    assertEquals(2, WekaUtils.getClassLabels(testCompatible, false).size());
}
Also used : Instances(weka.core.Instances) File(java.io.File) Test(org.junit.Test)

Example 27 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaUtilTest method tcInstanceToWekaInstanceRegressionTest.

@Test
public void tcInstanceToWekaInstanceRegressionTest() throws Exception {
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("feature5"));
    attributes.add(new Attribute("feature2"));
    attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
    attributes.add(new Attribute("feature1"));
    attributes.add(new Attribute("outcome"));
    Instances trainingData = new Instances("test", attributes, 0);
    weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToWekaInstance(i1, trainingData, null, true);
    weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToWekaInstance(i2, trainingData, null, true);
    assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
    assertEquals(5, wekaInstance1.numAttributes());
    wekaInstance1.dataset().add(wekaInstance1);
    wekaInstance2.dataset().add(wekaInstance2);
    System.out.println(wekaInstance1.dataset() + "\n");
    System.out.println(wekaInstance2.dataset() + "\n");
}
Also used : Instances(weka.core.Instances) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 28 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaTestTask method execute.

@Override
public void execute(TaskContext aContext) throws Exception {
    boolean multiLabel = learningMode.equals(Constants.LM_MULTI_LABEL);
    File arffFileTrain = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TRAINING_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
    File arffFileTest = WekaUtils.getFile(aContext, TEST_TASK_INPUT_KEY_TEST_DATA, Constants.FILENAME_DATA_IN_CLASSIFIER_FORMAT, AccessMode.READONLY);
    Instances trainData = WekaUtils.getInstances(arffFileTrain, multiLabel);
    Instances testData = WekaUtils.getInstances(arffFileTest, multiLabel);
    // do not balance in regression experiments
    if (!learningMode.equals(Constants.LM_REGRESSION)) {
        testData = WekaUtils.makeOutcomeClassesCompatible(trainData, testData, multiLabel);
    }
    Instances copyTestData = new Instances(testData);
    trainData = WekaUtils.removeInstanceId(trainData, multiLabel);
    testData = WekaUtils.removeInstanceId(testData, multiLabel);
    // FEATURE SELECTION
    if (!learningMode.equals(Constants.LM_MULTI_LABEL)) {
        if (featureSearcher != null && attributeEvaluator != null) {
            AttributeSelection attSel = WekaUtils.featureSelectionSinglelabel(aContext, trainData, featureSearcher, attributeEvaluator);
            File file = WekaUtils.getFile(aContext, "", WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
            FileUtils.writeStringToFile(file, attSel.toResultsString(), "utf-8");
            if (applySelection) {
                Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
                trainData = attSel.reduceDimensionality(trainData);
                testData = attSel.reduceDimensionality(testData);
            }
        }
    } else {
        if (attributeEvaluator != null && labelTransformationMethod != null && numLabelsToKeep > 0) {
            Remove attSel = WekaUtils.featureSelectionMultilabel(aContext, trainData, attributeEvaluator, labelTransformationMethod, numLabelsToKeep);
            if (applySelection) {
                Logger.getLogger(getClass()).info("APPLYING FEATURE SELECTION");
                trainData = WekaUtils.applyAttributeSelectionFilter(trainData, attSel);
                testData = WekaUtils.applyAttributeSelectionFilter(testData, attSel);
            }
        }
    }
    // build classifier
    Classifier cl = WekaUtils.getClassifier(learningMode, classificationArguments);
    // file to hold prediction results
    File evalOutput = WekaUtils.getFile(aContext, "", evaluationBin, AccessMode.READWRITE);
    // evaluation & prediction generation
    if (multiLabel) {
        // we don't need to build the classifier - meka does this
        // internally
        Result r = WekaUtils.getEvaluationMultilabel(cl, trainData, testData, threshold);
        WekaUtils.writeMlResultToFile(new MultilabelResult(r.allTrueValues(), r.allPredictions(), threshold), evalOutput);
        testData = WekaUtils.getPredictionInstancesMultiLabel(testData, cl, WekaUtils.getMekaThreshold(threshold, r, trainData));
        testData = WekaUtils.addInstanceId(testData, copyTestData, true);
    } else {
        // train the classifier on the train set split - not necessary in multilabel setup, but
        // in single label setup
        cl.buildClassifier(trainData);
        weka.core.SerializationHelper.write(evalOutput.getAbsolutePath(), WekaUtils.getEvaluationSinglelabel(cl, trainData, testData));
        testData = WekaUtils.getPredictionInstancesSingleLabel(testData, cl);
        testData = WekaUtils.addInstanceId(testData, copyTestData, false);
    }
    // Write out the prediction - the data sink expects an .arff ending file so we game it a bit
    // and rename the file afterwards to .txt
    File predictionFile = WekaUtils.getFile(aContext, "", Constants.FILENAME_PREDICTIONS, AccessMode.READWRITE);
    File arffDummy = new File(predictionFile.getParent(), "prediction.arff");
    DataSink.write(arffDummy.getAbsolutePath(), testData);
    FileUtils.moveFile(arffDummy, predictionFile);
}
Also used : Instances(weka.core.Instances) AttributeSelection(weka.attributeSelection.AttributeSelection) MultilabelResult(org.dkpro.tc.ml.weka.util.MultilabelResult) Remove(weka.filters.unsupervised.attribute.Remove) Classifier(weka.classifiers.Classifier) File(java.io.File) MultilabelResult(org.dkpro.tc.ml.weka.util.MultilabelResult) Result(meka.core.Result)

Example 29 with Instances

use of weka.core.Instances in project iobserve-analysis by research-iobserve.

the class TVectorQuantizationClustering method printInstances.

private void printInstances(final ClusteringResults results) {
    results.printClusteringResults();
    final Instances centroids = results.getClusteringMetrics().getCentroids();
    for (int i = 0; i < centroids.numInstances(); i++) {
        String logString = "";
        logString += "***************************";
        logString += "Cluster " + i;
        logString += "***************************";
        final Instance instance = centroids.instance(i);
        for (int a = 0; a < instance.numAttributes(); a++) {
            logString += centroids.attribute(a).name() + " : " + instance.value(a);
        }
        TVectorQuantizationClustering.LOGGER.info(logString);
    }
}
Also used : Instances(weka.core.Instances) Instance(weka.core.Instance)

Example 30 with Instances

use of weka.core.Instances in project iobserve-analysis by research-iobserve.

the class ClusterMerger method execute.

/*
     * (non-Javadoc)
     *
     * @see teetime.framework.AbstractConsumerStage#execute(java.lang.Object)
     */
@Override
protected void execute(final Map<Integer, List<Pair<Instance, Double>>> clustering) throws Exception {
    /**
     * simply pick the first instance of every cluster lookup attributes to build a new
     * instances Object
     */
    Instance instance = clustering.entrySet().iterator().next().getValue().get(0).getElement1();
    final FastVector attributes = new FastVector();
    for (int j = 0; j < instance.numAttributes(); j++) {
        attributes.addElement(instance.attribute(j));
    }
    final Instances result = new Instances("Clustering Result", attributes, clustering.size());
    for (final List<Pair<Instance, Double>> entry : clustering.values()) {
        if (!entry.isEmpty()) {
            instance = entry.get(0).getElement1();
            result.add(instance);
        }
    }
    this.printInstances(result);
    this.outputPort.send(result);
}
Also used : Instances(weka.core.Instances) FastVector(weka.core.FastVector) Instance(weka.core.Instance) Pair(org.eclipse.net4j.util.collection.Pair)

Aggregations

Instances (weka.core.Instances)31 Attribute (weka.core.Attribute)12 ArrayList (java.util.ArrayList)9 File (java.io.File)8 Instance (org.dkpro.tc.api.features.Instance)8 Test (org.junit.Test)8 MultiLabelInstances (mulan.data.MultiLabelInstances)7 IOException (java.io.IOException)5 DenseInstance (weka.core.DenseInstance)5 Instance (weka.core.Instance)5 ArffSaver (weka.core.converters.ArffSaver)5 Feature (org.dkpro.tc.api.features.Feature)4 Classifier (weka.classifiers.Classifier)3 FastVector (weka.core.FastVector)3 SparseInstance (weka.core.SparseInstance)3 HashMap (java.util.HashMap)2 Result (meka.core.Result)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)2 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2