Search in sources :

Example 16 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class BIRCHTest method testUSPS.

/**
     * Test of learn method, of class BIRCH.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        BIRCH birch = new BIRCH(x[0].length, 5, 16.0);
        for (int i = 0; i < 20; i++) {
            int[] index = Math.permutate(x.length);
            for (int j = 0; j < x.length; j++) {
                birch.add(x[index[j]]);
            }
        }
        birch.partition(10);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        int[] p = new int[x.length];
        for (int i = 0; i < x.length; i++) {
            p[i] = birch.predict(x[i]);
        }
        double r = rand.measure(y, p);
        double r2 = ari.measure(y, p);
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.65);
        assertTrue(r2 > 0.20);
        p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = birch.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.65);
        assertTrue(r2 > 0.20);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 17 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class NeuralNetworkTest method testUSPS.

/**
     * Test of learn method, of class NeuralNetwork.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        int p = x[0].length;
        double[] mu = Math.colMean(x);
        double[] sd = Math.colSd(x);
        for (int i = 0; i < x.length; i++) {
            for (int j = 0; j < p; j++) {
                x[i][j] = (x[i][j] - mu[j]) / sd[j];
            }
        }
        for (int i = 0; i < testx.length; i++) {
            for (int j = 0; j < p; j++) {
                testx[i][j] = (testx[i][j] - mu[j]) / sd[j];
            }
        }
        NeuralNetwork net = new NeuralNetwork(NeuralNetwork.ErrorFunction.CROSS_ENTROPY, NeuralNetwork.ActivationFunction.SOFTMAX, x[0].length, 40, Math.max(y) + 1);
        for (int j = 0; j < 30; j++) {
            net.learn(x, y);
        }
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (net.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Example 18 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class DecisionTreeTest method testUSPS.

/**
     * Test of learn method, of class DecisionTree.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        DecisionTree tree = new DecisionTree(x, y, 350, DecisionTree.SplitRule.ENTROPY);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (tree.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
        assertEquals(328, error);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Example 19 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class DecisionTreeTest method testUSPSNominal.

/**
     * Test of learn method, of class DecisionTree.
     */
@Test
public void testUSPSNominal() {
    System.out.println("USPS nominal");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        for (double[] xi : x) {
            for (int i = 0; i < xi.length; i++) {
                xi[i] = Math.round(255 * (xi[i] + 1) / 2);
            }
        }
        for (double[] xi : testx) {
            for (int i = 0; i < xi.length; i++) {
                xi[i] = Math.round(127 + 127 * xi[i]);
            }
        }
        Attribute[] attributes = new Attribute[256];
        String[] values = new String[attributes.length];
        for (int i = 0; i < attributes.length; i++) {
            values[i] = String.valueOf(i);
        }
        for (int i = 0; i < attributes.length; i++) {
            attributes[i] = new NominalAttribute("V" + i, values);
        }
        DecisionTree tree = new DecisionTree(attributes, x, y, 350, 2, DecisionTree.SplitRule.ENTROPY);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (tree.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
        double[] importance = tree.importance();
        int[] index = QuickSort.sort(importance);
        for (int i = importance.length; i-- > 0; ) {
            System.out.format("%s importance is %.4f%n", train.attributes()[index[i]], importance[i]);
        }
        assertEquals(324, error);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Example 20 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class GradientTreeBoostTest method testUSPS.

/**
     * Test of learn method, of class GradientTreeBoost.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        GradientTreeBoost boost = new GradientTreeBoost(train.attributes(), x, y, 100);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (boost.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("Gradient Tree Boost error rate = %.2f%%%n", 100.0 * error / testx.length);
        double[] accuracy = boost.test(testx, testy);
        for (int i = 1; i <= accuracy.length; i++) {
            System.out.format("%d trees accuracy = %.2f%%%n", i, 100.0 * accuracy[i - 1]);
        }
        double[] importance = boost.importance();
        int[] index = QuickSort.sort(importance);
        for (int i = importance.length; i-- > 0; ) {
            System.out.format("%s importance is %.4f%n", train.attributes()[index[i]], importance[i]);
        }
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Aggregations

NominalAttribute (smile.data.NominalAttribute)54 DelimitedTextParser (smile.data.parser.DelimitedTextParser)49 AttributeDataset (smile.data.AttributeDataset)48 Test (org.junit.Test)46 AdjustedRandIndex (smile.validation.AdjustedRandIndex)14 RandIndex (smile.validation.RandIndex)14 Attribute (smile.data.Attribute)12 ArrayList (java.util.ArrayList)7 EuclideanDistance (smile.math.distance.EuclideanDistance)5 IOException (java.io.IOException)4 BufferedReader (java.io.BufferedReader)3 ParseException (java.text.ParseException)3 LDA (smile.classification.LDA)3 InputStreamReader (java.io.InputStreamReader)2 DateAttribute (smile.data.DateAttribute)2 NumericAttribute (smile.data.NumericAttribute)2 StringAttribute (smile.data.StringAttribute)2 PlotCanvas (smile.plot.PlotCanvas)2 Accuracy (smile.validation.Accuracy)2 BorderLayout (java.awt.BorderLayout)1