Search in sources :

Example 1 with SparseDataset

use of smile.data.SparseDataset in project smile by haifengl.

the class LibsvmParserTest method testParseNG20.

/**
     * Test of parse method, of class LibsvmParser.
     */
@Test
public void testParseNG20() throws Exception {
    System.out.println("NG20");
    LibsvmParser parser = new LibsvmParser();
    try {
        SparseDataset train = parser.parse("NG20 Train", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.dat"));
        SparseDataset test = parser.parse("NG20 Test", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.t.dat"));
        int[] y = train.toArray(new int[train.size()]);
        int[] testy = test.toArray(new int[test.size()]);
        assertEquals(train.size(), 15935);
        assertEquals(y[0], 0);
        assertEquals(train.get(0, 0), 0.0, 1E-7);
        assertEquals(train.get(0, 1), 0.0, 1E-7);
        assertEquals(train.get(0, 196), 2.0, 1E-7);
        assertEquals(train.get(0, 320), 3.0, 1E-7);
        assertEquals(train.get(0, 20504), 0.0, 1E-7);
        assertEquals(train.get(0, 20505), 1.0, 1E-7);
        assertEquals(train.get(0, 20506), 1.0, 1E-7);
        assertEquals(train.get(0, 20507), 0.0, 1E-7);
        assertEquals(y[y.length - 1], 16);
        assertEquals(train.get(y.length - 1, 0), 1.0, 1E-7);
        assertEquals(train.get(y.length - 1, 1), 0.0, 1E-7);
        assertEquals(train.get(y.length - 1, 9), 1.0, 1E-7);
        assertEquals(train.get(y.length - 1, 10), 0.0, 1E-7);
        assertEquals(train.get(y.length - 1, 57796), 0.0, 1E-7);
        assertEquals(train.get(y.length - 1, 57797), 1.0, 1E-7);
        assertEquals(train.get(y.length - 1, 57798), 0.0, 1E-7);
        assertEquals(test.size(), 3993);
        assertEquals(testy[0], 1);
        assertEquals(testy[testy.length - 3], 17);
        assertEquals(testy[testy.length - 2], 18);
        assertEquals(testy[testy.length - 1], 16);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : SparseDataset(smile.data.SparseDataset) Test(org.junit.Test)

Example 2 with SparseDataset

use of smile.data.SparseDataset in project smile by haifengl.

the class LibsvmParserTest method testParseGlass.

/**
     * Test of parse method, of class LibsvmParser.
     */
@Test
public void testParseGlass() throws Exception {
    System.out.println("glass");
    LibsvmParser parser = new LibsvmParser();
    try {
        SparseDataset train = parser.parse("Glass", smile.data.parser.IOUtils.getTestDataFile("libsvm/glass.txt"));
        double[][] x = train.toArray();
        int[] y = train.toArray(new int[train.size()]);
        assertEquals(214, train.size());
        assertEquals(9, x[0].length);
        assertEquals(0, y[0]);
        assertEquals(-0.134323, x[0][0], 1E-7);
        assertEquals(-0.124812, x[0][1], 1E-7);
        assertEquals(1, x[0][2], 1E-7);
        assertEquals(-0.495327, x[0][3], 1E-7);
        assertEquals(-0.296429, x[0][4], 1E-7);
        assertEquals(-0.980676, x[0][5], 1E-7);
        assertEquals(-0.3829, x[0][6], 1E-7);
        assertEquals(-1, x[0][7], 1E-7);
        assertEquals(-1, x[0][8], 1E-7);
        assertEquals(5, y[213]);
        assertEquals(-0.476734, x[213][0], 1E-7);
        assertEquals(0.0526316, x[213][1], 1E-7);
        assertEquals(-1, x[213][2], 1E-7);
        assertEquals(0.115265, x[213][3], 1E-7);
        assertEquals(0.267857, x[213][4], 1E-7);
        assertEquals(-1, x[213][5], 1E-7);
        assertEquals(-0.407063, x[213][6], 1E-7);
        assertEquals(0.0603174, x[213][7], 1E-7);
        assertEquals(-1, x[213][8], 1E-7);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : SparseDataset(smile.data.SparseDataset) Test(org.junit.Test)

Example 3 with SparseDataset

use of smile.data.SparseDataset in project smile by haifengl.

the class LibsvmParser method parse.

/**
     * Parse a libsvm sparse dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public SparseDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    try {
        String line = reader.readLine();
        if (line == null) {
            throw new IOException("Empty data source.");
        }
        String[] tokens = line.trim().split("\\s+");
        boolean classification = true;
        Attribute response = null;
        try {
            Integer.valueOf(tokens[0]);
            response = new NominalAttribute("class");
        } catch (NumberFormatException e) {
            try {
                Double.valueOf(tokens[0]);
                response = new NominalAttribute("response");
                classification = false;
            } catch (NumberFormatException ex) {
                logger.error("Failed to parse {}", tokens[0], ex);
                throw new NumberFormatException("Unrecognized response variable value: " + tokens[0]);
            }
        }
        SparseDataset sparse = new SparseDataset(name, response);
        for (int i = 0; line != null; i++) {
            tokens = line.trim().split("\\s+");
            if (classification) {
                int y = Integer.parseInt(tokens[0]);
                sparse.set(i, y);
            } else {
                double y = Double.parseDouble(tokens[0]);
                sparse.set(i, y);
            }
            for (int k = 1; k < tokens.length; k++) {
                String[] pair = tokens[k].split(":");
                if (pair.length != 2) {
                    throw new NumberFormatException("Invalid data: " + tokens[k]);
                }
                int j = Integer.parseInt(pair[0]) - 1;
                double x = Double.parseDouble(pair[1]);
                sparse.set(i, j, x);
            }
            line = reader.readLine();
        }
        if (classification) {
            int n = sparse.size();
            int[] y = sparse.toArray(new int[n]);
            int[] label = Math.unique(y);
            Arrays.sort(label);
            for (int c : label) {
                response.valueOf(String.valueOf(c));
            }
            for (int i = 0; i < n; i++) {
                sparse.get(i).y = Arrays.binarySearch(label, y[i]);
            }
        }
        return sparse;
    } finally {
        reader.close();
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) IOException(java.io.IOException) NominalAttribute(smile.data.NominalAttribute) BufferedReader(java.io.BufferedReader) SparseDataset(smile.data.SparseDataset)

Example 4 with SparseDataset

use of smile.data.SparseDataset in project smile by haifengl.

the class SIBTest method testParseNG20.

/**
     * Test of parse method, of class SIB.
     */
@Test
public void testParseNG20() throws Exception {
    System.out.println("NG20");
    LibsvmParser parser = new LibsvmParser();
    try {
        SparseDataset train = parser.parse("NG20 Train", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.dat"));
        SparseDataset test = parser.parse("NG20 Test", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.t.dat"));
        int[] y = train.toArray(new int[train.size()]);
        int[] testy = test.toArray(new int[test.size()]);
        SIB sib = new SIB(train, 20, 100, 8);
        System.out.println(sib);
        AdjustedRandIndex ari = new AdjustedRandIndex();
        RandIndex rand = new RandIndex();
        double r = rand.measure(y, sib.getClusterLabel());
        double r2 = ari.measure(y, sib.getClusterLabel());
        System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.2);
        int[] p = new int[test.size()];
        for (int i = 0; i < test.size(); i++) {
            p[i] = sib.predict(test.get(i).x);
        }
        r = rand.measure(testy, p);
        r2 = ari.measure(testy, p);
        System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
        assertTrue(r > 0.85);
        assertTrue(r2 > 0.2);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : LibsvmParser(smile.data.parser.LibsvmParser) SparseDataset(smile.data.SparseDataset) AdjustedRandIndex(smile.validation.AdjustedRandIndex) RandIndex(smile.validation.RandIndex) AdjustedRandIndex(smile.validation.AdjustedRandIndex) Test(org.junit.Test)

Example 5 with SparseDataset

use of smile.data.SparseDataset in project smile by haifengl.

the class SparseDatasetParserTest method testParse.

/**
     * Test of parse method, of class SparseDatasetParser.
     */
@Test
public void testParse() throws Exception {
    System.out.println("parse");
    try {
        SparseDatasetParser parser = new SparseDatasetParser(1);
        SparseDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("text/kos.txt"));
        assertEquals(3430, data.size());
        assertEquals(6906, data.ncols());
        assertEquals(353160, data.length());
        assertEquals(2.0, data.get(0, 60), 1E-7);
        assertEquals(1.0, data.get(1, 1062), 1E-7);
        assertEquals(0.0, data.get(1, 1063), 1E-7);
        assertEquals(1.0, data.get(3429, 6821), 1E-7);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : SparseDataset(smile.data.SparseDataset) Test(org.junit.Test)

Aggregations

SparseDataset (smile.data.SparseDataset)6 Test (org.junit.Test)4 BufferedReader (java.io.BufferedReader)2 IOException (java.io.IOException)2 InputStreamReader (java.io.InputStreamReader)2 ParseException (java.text.ParseException)1 Attribute (smile.data.Attribute)1 NominalAttribute (smile.data.NominalAttribute)1 LibsvmParser (smile.data.parser.LibsvmParser)1 AdjustedRandIndex (smile.validation.AdjustedRandIndex)1 RandIndex (smile.validation.RandIndex)1