Search in sources :

Example 21 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaDataWriter method writeClassifierFormat.

@Override
public void writeClassifierFormat(Collection<Instance> instances) throws AnalysisEngineProcessException {
    try {
        Instances masterInstance = initalConfiguration(instances);
        for (Instance inst : instances) {
            double[] featureValues = getFeatureValues(attributeStore, inst);
            weka.core.Instance wekaInstance;
            if (useSparse) {
                wekaInstance = new SparseInstance(1.0, featureValues);
            } else {
                wekaInstance = new DenseInstance(1.0, featureValues);
            }
            wekaInstance.setDataset(masterInstance);
            String outcome = inst.getOutcome();
            if (isRegression) {
                wekaInstance.setClassValue(Double.parseDouble(outcome));
            } else {
                wekaInstance.setClassValue(outcome);
            }
            Double instanceWeight = inst.getWeight();
            if (applyWeighting) {
                wekaInstance.setWeight(instanceWeight);
            }
            // preprocessingFilter.input(wekaInstance);
            // saver.writeIncremental(preprocessingFilter.output());
            saver.writeIncremental(wekaInstance);
        }
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
Also used : Instances(weka.core.Instances) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) DenseInstance(weka.core.DenseInstance) SparseInstance(weka.core.SparseInstance) Instance(org.dkpro.tc.api.features.Instance) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Example 22 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaArffTest method main.

/**
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    File train = new File("src/main/resources/arff/manyInstances/train.arff.gz");
    File test = new File("src/main/resources/arff/manyInstances/test.arff.gz");
    Instances trainData = WekaUtils.getInstances(train, false);
    Instances testData = WekaUtils.getInstances(test, false);
    Classifier cl = new NaiveBayes();
    // no problems until here
    Evaluation eval = new Evaluation(trainData);
    eval.evaluateModel(cl, testData);
}
Also used : Instances(weka.core.Instances) NaiveBayes(weka.classifiers.bayes.NaiveBayes) Evaluation(weka.classifiers.Evaluation) Classifier(weka.classifiers.Classifier) File(java.io.File)

Example 23 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaResultsTest method testWekaResultsRegression.

@Test
public void testWekaResultsRegression() throws Exception {
    SMOreg cl = new SMOreg();
    Instances trainData = WekaUtils.removeInstanceId(regressionTrainData, false);
    Instances testData = WekaUtils.removeInstanceId(regressionTestData, false);
    cl.buildClassifier(trainData);
    Evaluation eval = WekaUtils.getEvaluationSinglelabel(cl, trainData, testData);
    assertEquals(0.45, eval.correlationCoefficient(), 0.01);
}
Also used : Instances(weka.core.Instances) Evaluation(weka.classifiers.Evaluation) SMOreg(weka.classifiers.functions.SMOreg) Test(org.junit.Test)

Example 24 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaResultsTest method testWekaResultsMultiLabel.

@Test
public void testWekaResultsMultiLabel() throws Exception {
    BR cl = new BR();
    cl.setOptions(new String[] { "-W", J48.class.getName() });
    Instances testData = WekaUtils.makeOutcomeClassesCompatible(multiLabelTrainData, multiLabelTestData, true);
    Instances trainData = WekaUtils.removeInstanceId(multiLabelTrainData, true);
    testData = WekaUtils.removeInstanceId(testData, true);
    cl.buildClassifier(trainData);
    Result eval = WekaUtils.getEvaluationMultilabel(cl, trainData, testData, "0.2");
    assertEquals(16.0, eval.L, 0.01);
    assertEquals(0.0, (Double) Result.getStats(eval, "1").get("Exact match"), 0.01);
}
Also used : BR(meka.classifiers.multilabel.BR) Instances(weka.core.Instances) J48(weka.classifiers.trees.J48) Result(meka.core.Result) Test(org.junit.Test)

Example 25 with Instances

use of weka.core.Instances in project dkpro-tc by dkpro.

the class WekaUtilTest method tcInstanceToMekaInstanceTest.

@Test
public void tcInstanceToMekaInstanceTest() throws Exception {
    List<String> outcomeValues = Arrays.asList(new String[] { "outc_1", "outc_2", "outc_3" });
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature4", "val_1", FeatureType.STRING));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("outc_1", Arrays.asList(new String[] { "0", "1" })));
    attributes.add(new Attribute("outc_2", Arrays.asList(new String[] { "0", "1" })));
    attributes.add(new Attribute("outc_3", Arrays.asList(new String[] { "0", "1" })));
    attributes.add(new Attribute("feature5"));
    attributes.add(new Attribute("feature2"));
    attributes.add(new Attribute("feature4", Arrays.asList(new String[] { "val_1", "val_2" })));
    attributes.add(new Attribute("feature1"));
    Instances trainingData = new Instances("test", attributes, 0);
    weka.core.Instance wekaInstance1 = WekaUtils.tcInstanceToMekaInstance(i1, trainingData, outcomeValues);
    weka.core.Instance wekaInstance2 = WekaUtils.tcInstanceToMekaInstance(i2, trainingData, outcomeValues);
    assertEquals(true, wekaInstance1.equalHeaders(wekaInstance2));
    assertEquals(7, wekaInstance1.numAttributes());
    wekaInstance1.dataset().add(wekaInstance1);
    wekaInstance2.dataset().add(wekaInstance2);
    System.out.println(wekaInstance1.dataset() + "\n");
    System.out.println(wekaInstance2.dataset() + "\n");
}
Also used : Instances(weka.core.Instances) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Aggregations

Instances (weka.core.Instances)31 Attribute (weka.core.Attribute)12 ArrayList (java.util.ArrayList)9 File (java.io.File)8 Instance (org.dkpro.tc.api.features.Instance)8 Test (org.junit.Test)8 MultiLabelInstances (mulan.data.MultiLabelInstances)7 IOException (java.io.IOException)5 DenseInstance (weka.core.DenseInstance)5 Instance (weka.core.Instance)5 ArffSaver (weka.core.converters.ArffSaver)5 Feature (org.dkpro.tc.api.features.Feature)4 Classifier (weka.classifiers.Classifier)3 FastVector (weka.core.FastVector)3 SparseInstance (weka.core.SparseInstance)3 HashMap (java.util.HashMap)2 Result (meka.core.Result)2 AnalysisEngineProcessException (org.apache.uima.analysis_engine.AnalysisEngineProcessException)2 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)2 FeatureType (org.dkpro.tc.api.features.FeatureType)2