Examples with AttributeDataset - smile.data.AttributeDataset

Example 11 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class GrowingNeuralGasTest method testUSPS.

/**
     * Test of learn method, of class GrowingNeuralGas.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        GrowingNeuralGas gng = new GrowingNeuralGas(x[0].length);
        for (int i = 0; i < 10; i++) {
            int[] index = Math.permutate(x.length);
            for (int j = 0; j < x.length; j++) {
                gng.update(x[index[j]]);
            }
        }
        gng.partition(10);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        int[] p = new int[x.length];
        for (int i = 0; i < x.length; i++) {
            p[i] = gng.predict(x[i]);
        }
        double r = rand.measure(y, p);
        double r2 = ari.measure(y, p);
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.40);
        p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = gng.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.40);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}

Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) AdjustedRandIndex(smile.validation.AdjustedRandIndex) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 12 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class NeuralMapTest method testUSPS.

/**
     * Test of learn method, of class NeuralMap.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        NeuralMap cortex = new NeuralMap(x[0].length, 8.0, 0.05, 0.0006, 5, 3);
        for (int i = 0; i < 5; i++) {
            for (double[] xi : x) {
                cortex.update(xi);
            }
        }
        cortex.purge(16);
        cortex.partition(10);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        int[] p = new int[x.length];
        for (int i = 0; i < x.length; i++) {
            p[i] = cortex.predict(x[i]);
        }
        double r = rand.measure(y, p);
        double r2 = ari.measure(y, p);
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        //assertTrue(r > 0.65);
        //assertTrue(r2 > 0.18);
        p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = cortex.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
    //assertTrue(r > 0.65);
    //assertTrue(r2 > 0.18);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}

Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 13 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class SOMTest method testUSPS.

/**
     * Test of learn method, of class SOM.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        SOM som = new SOM(x, 10, 10);
        int[] label = som.partition(10);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        double r = rand.measure(y, label);
        double r2 = ari.measure(y, label);
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.88);
        assertTrue(r2 > 0.45);
        int[] p = new int[testx.length];
        for (int i = 0; i < testx.length; i++) {
            p[i] = som.predict(testx[i]);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.88);
        assertTrue(r2 > 0.45);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}

Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 14 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class GCTParser method parse.

/**
     * Parse a GCT dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    String line = reader.readLine();
    if (line == null) {
        throw new IOException("Empty data source.");
    }
    if (!line.equals("#1.2")) {
        throw new IOException("Invalid version.");
    }
    line = reader.readLine();
    if (line == null) {
        throw new IOException("Premature end of file.");
    }
    String[] tokens = line.split("\t", -1);
    if (tokens.length != 2) {
        throw new IOException("Invalid data size inforamation.");
    }
    int n = Integer.parseInt(tokens[0]);
    int p = Integer.parseInt(tokens[1]);
    if (n <= 0 || p <= 0) {
        throw new IOException(String.format("Invalid data size %d x %d.", n, p));
    }
    Attribute[] attributes = new Attribute[p];
    line = reader.readLine();
    if (line == null) {
        throw new IOException("Premature end of file.");
    }
    tokens = line.split("\t", -1);
    if (tokens.length != p + 2) {
        throw new IOException("Invalid title header.");
    }
    for (int i = 0; i < p; i++) {
        attributes[i] = new NumericAttribute(tokens[i + 2]);
    }
    AttributeDataset data = new AttributeDataset(name, attributes);
    for (int i = 0; i < n; i++) {
        line = reader.readLine();
        if (line == null) {
            throw new IOException("Premature end of file.");
        }
        tokens = line.split("\t", -1);
        if (tokens.length != p + 2) {
            throw new IOException(String.format("Invalid number of elements of line %d: %d", i + 4, tokens.length));
        }
        double[] x = new double[p];
        for (int j = 0; j < p; j++) {
            if (tokens[j + 2].isEmpty()) {
                x[j] = Double.NaN;
            } else {
                x[j] = Double.valueOf(tokens[j + 2]);
            }
        }
        Datum<double[]> datum = new Datum<>(x);
        datum.name = tokens[0];
        datum.description = tokens[1];
        data.add(datum);
    }
    reader.close();
    return data;
}

Also used : AttributeDataset(smile.data.AttributeDataset) Datum(smile.data.Datum) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NumericAttribute(smile.data.NumericAttribute) IOException(java.io.IOException) NumericAttribute(smile.data.NumericAttribute) BufferedReader(java.io.BufferedReader)

Example 15 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class PCLParser method parse.

/**
     * Parse a PCL dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    String line = reader.readLine();
    if (line == null) {
        throw new IOException("Empty data source.");
    }
    String[] tokens = line.split("\t", -1);
    int p = tokens.length - 3;
    line = reader.readLine();
    if (line == null) {
        throw new IOException("Premature end of file.");
    }
    String[] weight = line.split("\t", -1);
    if (weight.length != tokens.length) {
        throw new IOException("Invalid sample weight header.");
    }
    Attribute[] attributes = new Attribute[p];
    for (int i = 0; i < p; i++) {
        attributes[i] = new NumericAttribute(tokens[i + 3], null, Double.valueOf(weight[i + 3]));
    }
    AttributeDataset data = new AttributeDataset(name, attributes);
    for (int i = 3; (line = reader.readLine()) != null; i++) {
        tokens = line.split("\t", -1);
        if (tokens.length != weight.length) {
            throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
        }
        double[] x = new double[p];
        for (int j = 0; j < p; j++) {
            if (tokens[j + 3].isEmpty()) {
                x[j] = Double.NaN;
            } else {
                x[j] = Double.valueOf(tokens[j + 3]);
            }
        }
        Datum<double[]> datum = new Datum<>(x);
        datum.name = tokens[0];
        datum.description = tokens[1];
        datum.weight = Double.valueOf(tokens[2]);
        data.add(datum);
    }
    reader.close();
    return data;
}

Aggregations

AttributeDataset (smile.data.AttributeDataset)140 Test (org.junit.Test)125 ArffParser (smile.data.parser.ArffParser)75 NominalAttribute (smile.data.NominalAttribute)50 DelimitedTextParser (smile.data.parser.DelimitedTextParser)48 Attribute (smile.data.Attribute)29 EuclideanDistance (smile.math.distance.EuclideanDistance)19 LOOCV (smile.validation.LOOCV)18 CrossValidation (smile.validation.CrossValidation)17 AdjustedRandIndex (smile.validation.AdjustedRandIndex)14 RandIndex (smile.validation.RandIndex)14 ClassifierTrainer (smile.classification.ClassifierTrainer)13 GaussianKernel (smile.math.kernel.GaussianKernel)11 IOException (java.io.IOException)10 RadialBasisFunction (smile.math.rbf.RadialBasisFunction)9 RBFNetwork (smile.regression.RBFNetwork)8 ArrayList (java.util.ArrayList)6 KMeans (smile.clustering.KMeans)6 Datum (smile.data.Datum)6 NumericAttribute (smile.data.NumericAttribute)6