Search in sources :

Example 16 with DelimitedTextParser

use of smile.data.parser.DelimitedTextParser in project smile by haifengl.

the class RandomForestTest method testUSPSNominal.

/**
     * Test of learn method, of class RandomForest.
     */
@Test
public void testUSPSNominal() {
    System.out.println("USPS nominal");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        for (double[] xi : x) {
            for (int i = 0; i < xi.length; i++) {
                xi[i] = Math.round(255 * (xi[i] + 1) / 2);
            }
        }
        for (double[] xi : testx) {
            for (int i = 0; i < xi.length; i++) {
                xi[i] = Math.round(255 * (xi[i] + 1) / 2);
            }
        }
        Attribute[] attributes = new Attribute[256];
        String[] values = new String[attributes.length];
        for (int i = 0; i < attributes.length; i++) {
            values[i] = String.valueOf(i);
        }
        for (int i = 0; i < attributes.length; i++) {
            attributes[i] = new NominalAttribute("V" + i, values);
        }
        RandomForest forest = new RandomForest(attributes, x, y, 200);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (forest.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.println(error);
        System.out.format("USPS OOB error rate = %.2f%%%n", 100.0 * forest.error());
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
        double[] accuracy = forest.test(testx, testy);
        for (int i = 1; i <= accuracy.length; i++) {
            System.out.format("%d trees accuracy = %.2f%%%n", i, 100.0 * accuracy[i - 1]);
        }
        double[] importance = forest.importance();
        int[] index = QuickSort.sort(importance);
        for (int i = importance.length; i-- > 0; ) {
            System.out.format("%s importance is %.4f%n", train.attributes()[index[i]], importance[i]);
        }
        assertTrue(error <= 150);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Example 17 with DelimitedTextParser

use of smile.data.parser.DelimitedTextParser in project smile by haifengl.

the class BIRCHTest method testUSPS.

/**
     * Test of learn method, of class BIRCH.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        BIRCH birch = new BIRCH(x[0].length, 5, 16.0);
        for (int i = 0; i < 20; i++) {
            int[] index = Math.permutate(x.length);
            for (int j = 0; j < x.length; j++) {
                birch.add(x[index[j]]);
            }
        }
        birch.partition(10);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        int[] p = new int[x.length];
        for (int i = 0; i < x.length; i++) {
            p[i] = birch.predict(x[i]);
        }
        double r = rand.measure(y, p);
        double r2 = ari.measure(y, p);
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.65);
        assertTrue(r2 > 0.20);
        p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = birch.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.65);
        assertTrue(r2 > 0.20);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 18 with DelimitedTextParser

use of smile.data.parser.DelimitedTextParser in project smile by haifengl.

the class NeuralNetworkTest method testUSPS.

/**
     * Test of learn method, of class NeuralNetwork.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        int p = x[0].length;
        double[] mu = Math.colMean(x);
        double[] sd = Math.colSd(x);
        for (int i = 0; i < x.length; i++) {
            for (int j = 0; j < p; j++) {
                x[i][j] = (x[i][j] - mu[j]) / sd[j];
            }
        }
        for (int i = 0; i < testx.length; i++) {
            for (int j = 0; j < p; j++) {
                testx[i][j] = (testx[i][j] - mu[j]) / sd[j];
            }
        }
        NeuralNetwork net = new NeuralNetwork(NeuralNetwork.ErrorFunction.CROSS_ENTROPY, NeuralNetwork.ActivationFunction.SOFTMAX, x[0].length, 40, Math.max(y) + 1);
        for (int j = 0; j < 30; j++) {
            net.learn(x, y);
        }
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (net.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Example 19 with DelimitedTextParser

use of smile.data.parser.DelimitedTextParser in project smile by haifengl.

the class DecisionTreeTest method testUSPS.

/**
     * Test of learn method, of class DecisionTree.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        DecisionTree tree = new DecisionTree(x, y, 350, DecisionTree.SplitRule.ENTROPY);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (tree.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
        assertEquals(328, error);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Example 20 with DelimitedTextParser

use of smile.data.parser.DelimitedTextParser in project smile by haifengl.

the class DecisionTreeTest method testUSPSNominal.

/**
     * Test of learn method, of class DecisionTree.
     */
@Test
public void testUSPSNominal() {
    System.out.println("USPS nominal");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        for (double[] xi : x) {
            for (int i = 0; i < xi.length; i++) {
                xi[i] = Math.round(255 * (xi[i] + 1) / 2);
            }
        }
        for (double[] xi : testx) {
            for (int i = 0; i < xi.length; i++) {
                xi[i] = Math.round(127 + 127 * xi[i]);
            }
        }
        Attribute[] attributes = new Attribute[256];
        String[] values = new String[attributes.length];
        for (int i = 0; i < attributes.length; i++) {
            values[i] = String.valueOf(i);
        }
        for (int i = 0; i < attributes.length; i++) {
            attributes[i] = new NominalAttribute("V" + i, values);
        }
        DecisionTree tree = new DecisionTree(attributes, x, y, 350, 2, DecisionTree.SplitRule.ENTROPY);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (tree.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
        double[] importance = tree.importance();
        int[] index = QuickSort.sort(importance);
        for (int i = importance.length; i-- > 0; ) {
            System.out.format("%s importance is %.4f%n", train.attributes()[index[i]], importance[i]);
        }
        assertEquals(324, error);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Aggregations

DelimitedTextParser (smile.data.parser.DelimitedTextParser)55 NominalAttribute (smile.data.NominalAttribute)49 AttributeDataset (smile.data.AttributeDataset)48 Test (org.junit.Test)45 AdjustedRandIndex (smile.validation.AdjustedRandIndex)14 RandIndex (smile.validation.RandIndex)14 Attribute (smile.data.Attribute)8 EuclideanDistance (smile.math.distance.EuclideanDistance)5 ArrayList (java.util.ArrayList)4 LDA (smile.classification.LDA)3 PlotCanvas (smile.plot.PlotCanvas)2 Accuracy (smile.validation.Accuracy)2 BorderLayout (java.awt.BorderLayout)1 IOException (java.io.IOException)1 ParseException (java.text.ParseException)1 JFrame (javax.swing.JFrame)1 ClassifierTrainer (smile.classification.ClassifierTrainer)1 CompleteLinkage (smile.clustering.linkage.CompleteLinkage)1 SingleLinkage (smile.clustering.linkage.SingleLinkage)1 UPGMALinkage (smile.clustering.linkage.UPGMALinkage)1