Search in sources :

Example 21 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class GradientTreeBoostTest method testUSPS2.

/**
     * Test of learn method, of class GradientTreeBoost.
     */
@Test
public void testUSPS2() {
    System.out.println("USPS 2 classes");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        for (int i = 0; i < y.length; i++) {
            if (y[i] != 0) {
                y[i] = 1;
            }
        }
        for (int i = 0; i < testy.length; i++) {
            if (testy[i] != 0) {
                testy[i] = 1;
            }
        }
        GradientTreeBoost boost = new GradientTreeBoost(train.attributes(), x, y, 100);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (boost.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.format("Gradient Tree Boost error rate = %.2f%%%n", 100.0 * error / testx.length);
        double[] accuracy = boost.test(testx, testy);
        for (int i = 1; i <= accuracy.length; i++) {
            System.out.format("%d trees accuracy = %.2f%%%n", i, 100.0 * accuracy[i - 1]);
        }
        double[] importance = boost.importance();
        int[] index = QuickSort.sort(importance);
        for (int i = importance.length; i-- > 0; ) {
            System.out.format("%s importance is %.4f%n", train.attributes()[index[i]], importance[i]);
        }
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Example 22 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class CLARANSTest method testUSPS.

/**
     * Test of learn method, of class CLARANS.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        CLARANS<double[]> clarans = new CLARANS<>(x, new EuclideanDistance(), 10, 50, 8);
        double r = rand.measure(y, clarans.getClusterLabel());
        double r2 = ari.measure(y, clarans.getClusterLabel());
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.8);
        assertTrue(r2 > 0.28);
        int[] p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = clarans.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.8);
        assertTrue(r2 > 0.25);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) EuclideanDistance(smile.math.distance.EuclideanDistance) NominalAttribute(smile.data.NominalAttribute) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 23 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class KMeansTest method testUSPS.

/**
     * Test of learn method, of class KMeans.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        KMeans kmeans = new KMeans(x, 10, 100, 4);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        double r = rand.measure(y, kmeans.getClusterLabel());
        double r2 = ari.measure(y, kmeans.getClusterLabel());
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.45);
        int[] p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = kmeans.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.45);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) AdjustedRandIndex(smile.validation.AdjustedRandIndex) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 24 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class MECTest method testUSPS.

/**
     * Test of learn method, of class MEC.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        MEC<double[]> mec = new MEC<>(x, new EuclideanDistance(), 10, 8.0);
        double r = rand.measure(y, mec.getClusterLabel());
        double r2 = ari.measure(y, mec.getClusterLabel());
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.35);
        int[] p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = mec.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.35);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) EuclideanDistance(smile.math.distance.EuclideanDistance) NominalAttribute(smile.data.NominalAttribute) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 25 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class NumericAttributeFeatureTest method testNORMALIZATION.

/**
     * Test of f method, of class NumericAttributeFeature.
     */
@Test
public void testNORMALIZATION() {
    System.out.println("NORMALIZATION");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        double[][] x = data.toArray(new double[data.size()][]);
        double[] min = Math.colMin(x);
        double[] max = Math.colMax(x);
        NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.NORMALIZATION, x);
        Attribute[] attributes = naf.attributes();
        assertEquals(256, attributes.length);
        for (int i = 0; i < x.length; i++) {
            double[] y = new double[attributes.length];
            for (int j = 0; j < y.length; j++) {
                y[j] = naf.f(x[i], j);
                assertEquals((x[i][j] - min[j]) / (max[j] - min[j]), y[j], 1E-7);
            }
        }
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) Test(org.junit.Test)

Aggregations

NominalAttribute (smile.data.NominalAttribute)54 DelimitedTextParser (smile.data.parser.DelimitedTextParser)49 AttributeDataset (smile.data.AttributeDataset)48 Test (org.junit.Test)46 AdjustedRandIndex (smile.validation.AdjustedRandIndex)14 RandIndex (smile.validation.RandIndex)14 Attribute (smile.data.Attribute)12 ArrayList (java.util.ArrayList)7 EuclideanDistance (smile.math.distance.EuclideanDistance)5 IOException (java.io.IOException)4 BufferedReader (java.io.BufferedReader)3 ParseException (java.text.ParseException)3 LDA (smile.classification.LDA)3 InputStreamReader (java.io.InputStreamReader)2 DateAttribute (smile.data.DateAttribute)2 NumericAttribute (smile.data.NumericAttribute)2 StringAttribute (smile.data.StringAttribute)2 PlotCanvas (smile.plot.PlotCanvas)2 Accuracy (smile.validation.Accuracy)2 BorderLayout (java.awt.BorderLayout)1