Search in sources :

Example 6 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class LibsvmParser method parse.

/**
     * Parse a libsvm sparse dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public SparseDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    try {
        String line = reader.readLine();
        if (line == null) {
            throw new IOException("Empty data source.");
        }
        String[] tokens = line.trim().split("\\s+");
        boolean classification = true;
        Attribute response = null;
        try {
            Integer.valueOf(tokens[0]);
            response = new NominalAttribute("class");
        } catch (NumberFormatException e) {
            try {
                Double.valueOf(tokens[0]);
                response = new NominalAttribute("response");
                classification = false;
            } catch (NumberFormatException ex) {
                logger.error("Failed to parse {}", tokens[0], ex);
                throw new NumberFormatException("Unrecognized response variable value: " + tokens[0]);
            }
        }
        SparseDataset sparse = new SparseDataset(name, response);
        for (int i = 0; line != null; i++) {
            tokens = line.trim().split("\\s+");
            if (classification) {
                int y = Integer.parseInt(tokens[0]);
                sparse.set(i, y);
            } else {
                double y = Double.parseDouble(tokens[0]);
                sparse.set(i, y);
            }
            for (int k = 1; k < tokens.length; k++) {
                String[] pair = tokens[k].split(":");
                if (pair.length != 2) {
                    throw new NumberFormatException("Invalid data: " + tokens[k]);
                }
                int j = Integer.parseInt(pair[0]) - 1;
                double x = Double.parseDouble(pair[1]);
                sparse.set(i, j, x);
            }
            line = reader.readLine();
        }
        if (classification) {
            int n = sparse.size();
            int[] y = sparse.toArray(new int[n]);
            int[] label = Math.unique(y);
            Arrays.sort(label);
            for (int c : label) {
                response.valueOf(String.valueOf(c));
            }
            for (int i = 0; i < n; i++) {
                sparse.get(i).y = Arrays.binarySearch(label, y[i]);
            }
        }
        return sparse;
    } finally {
        reader.close();
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) IOException(java.io.IOException) NominalAttribute(smile.data.NominalAttribute) BufferedReader(java.io.BufferedReader) SparseDataset(smile.data.SparseDataset)

Example 7 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class LDADemo method actionPerformed.

@Override
public void actionPerformed(ActionEvent e) {
    if ("startButton".equals(e.getActionCommand())) {
        datasetIndex = datasetBox.getSelectedIndex();
        if (dataset[datasetIndex] == null) {
            DelimitedTextParser parser = new DelimitedTextParser();
            parser.setDelimiter("[\t]+");
            if (datasetIndex == 0) {
                parser.setColumnNames(true);
            }
            if (datasetIndex == 0) {
                parser.setResponseIndex(new NominalAttribute("class"), 4);
            }
            if (datasetIndex == 1) {
                parser.setResponseIndex(new NominalAttribute("class"), 16);
            }
            try {
                dataset[datasetIndex] = parser.parse(datasetName[datasetIndex], smile.data.parser.IOUtils.getTestDataFile(datasource[datasetIndex]));
            } catch (Exception ex) {
                JOptionPane.showMessageDialog(null, "Failed to load dataset.", "ERROR", JOptionPane.ERROR_MESSAGE);
                System.out.println(ex);
            }
        }
        if (dataset[datasetIndex].size() < 500) {
            pointLegend = 'o';
        } else {
            pointLegend = '.';
        }
        Thread thread = new Thread(this);
        thread.start();
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) NominalAttribute(smile.data.NominalAttribute)

Example 8 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class ProjectionDemo method actionPerformed.

@Override
public void actionPerformed(ActionEvent e) {
    if ("startButton".equals(e.getActionCommand())) {
        datasetIndex = datasetBox.getSelectedIndex();
        if (dataset[datasetIndex] == null) {
            DelimitedTextParser parser = new DelimitedTextParser();
            parser.setDelimiter("[\t]+");
            if (datasetIndex < 5 && datasetIndex != 3) {
                parser.setColumnNames(true);
            }
            if (datasetIndex == 1) {
                parser.setRowNames(true);
            }
            if (datasetIndex == 0) {
                parser.setResponseIndex(new NominalAttribute("class"), 4);
            }
            if (datasetIndex == 3) {
                parser.setResponseIndex(new NominalAttribute("class"), 16);
            }
            if (datasetIndex >= 5) {
                parser.setResponseIndex(new NominalAttribute("class"), 4);
            }
            try {
                dataset[datasetIndex] = parser.parse(datasetName[datasetIndex], smile.data.parser.IOUtils.getTestDataFile(datasource[datasetIndex]));
            } catch (Exception ex) {
                JOptionPane.showMessageDialog(null, "Failed to load dataset.", "ERROR", JOptionPane.ERROR_MESSAGE);
                System.out.println(ex);
            }
        }
        if (dataset[datasetIndex].size() < 500) {
            pointLegend = 'o';
        } else {
            pointLegend = '.';
        }
        Thread thread = new Thread(this);
        thread.start();
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) NominalAttribute(smile.data.NominalAttribute)

Example 9 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class HexmapDemo method main.

public static void main(String[] args) {
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        int m = 20;
        int n = 20;
        SOM som = new SOM(x, m, n);
        String[][] labels = new String[m][n];
        int[] neurons = new int[x.length];
        for (int i = 0; i < x.length; i++) {
            neurons[i] = som.predict(x[i]);
        }
        int[] count = new int[10];
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                Arrays.fill(count, 0);
                for (int k = 0; k < neurons.length; k++) {
                    if (neurons[k] == i * n + j) {
                        count[y[k]]++;
                    }
                }
                int sum = Math.sum(count);
                if (sum == 0.0) {
                    labels[i][j] = "no samples";
                } else {
                    labels[i][j] = String.format("<table border=\"1\"><tr><td>Total</td><td align=\"right\">%d</td></tr>", sum);
                    for (int l = 0; l < count.length; l++) {
                        if (count[l] > 0) {
                            labels[i][j] += String.format("<tr><td>class %d</td><td align=\"right\">%.1f%%</td></tr>", l, 100.0 * count[l] / sum);
                        }
                    }
                    labels[i][j] += "</table>";
                }
            }
        }
        double[][] umatrix = som.umatrix();
        double[][][] map = som.map();
        double[][] proximity = new double[m * n][m * n];
        for (int i = 0; i < m * n; i++) {
            for (int j = 0; j < m * n; j++) {
                proximity[i][j] = Math.distance(map[i / n][i % n], map[j / n][j % n]);
            }
        }
        MDS mds = new MDS(proximity, 3);
        double[][] coords = mds.getCoordinates();
        double[][][] mdsgrid = new double[m][n][];
        for (int i = 0; i < m * n; i++) {
            mdsgrid[i / n][i % n] = mds.getCoordinates()[i];
        }
        SammonMapping sammon = new SammonMapping(proximity, coords);
        double[][][] sammongrid = new double[m][n][];
        for (int i = 0; i < m * n; i++) {
            sammongrid[i / n][i % n] = sammon.getCoordinates()[i];
        }
        IsotonicMDS isomds = new IsotonicMDS(proximity, coords);
        double[][][] isomdsgrid = new double[m][n][];
        for (int i = 0; i < m * n; i++) {
            isomdsgrid[i / n][i % n] = isomds.getCoordinates()[i];
        }
        JFrame frame = new JFrame("Hexmap");
        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        frame.setLocationRelativeTo(null);
        frame.add(Hexmap.plot(labels, umatrix));
        PlotCanvas canvas = Surface.plot(mdsgrid);
        canvas.setTitle("MDS");
        frame.add(canvas);
        canvas = Surface.plot(isomdsgrid);
        canvas.setTitle("Isotonic MDS");
        frame.add(canvas);
        canvas = Surface.plot(sammongrid);
        canvas.setTitle("Sammon Mapping");
        frame.add(canvas);
        frame.setVisible(true);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) SammonMapping(smile.mds.SammonMapping) SOM(smile.vq.SOM) NominalAttribute(smile.data.NominalAttribute) IsotonicMDS(smile.mds.IsotonicMDS) JFrame(javax.swing.JFrame) MDS(smile.mds.MDS) IsotonicMDS(smile.mds.IsotonicMDS) PlotCanvas(smile.plot.PlotCanvas)

Example 10 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class AdaBoostTest method testUSPS.

/**
     * Test of learn method, of class AdaBoost.
     */
@Test
public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        for (int i = 0; i < y.length; i++) {
            if (y[i] != 0)
                y[i] = 1;
        }
        for (int i = 0; i < testy.length; i++) {
            if (testy[i] != 0)
                testy[i] = 1;
        }
        AdaBoost forest = new AdaBoost(x, y, 100, 6);
        int error = 0;
        for (int i = 0; i < testx.length; i++) {
            if (forest.predict(testx[i]) != testy[i]) {
                error++;
            }
        }
        System.out.println("AdaBoost error = " + error);
        System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
        assertTrue(error <= 25);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) Test(org.junit.Test)

Aggregations

NominalAttribute (smile.data.NominalAttribute)54 DelimitedTextParser (smile.data.parser.DelimitedTextParser)49 AttributeDataset (smile.data.AttributeDataset)48 Test (org.junit.Test)46 AdjustedRandIndex (smile.validation.AdjustedRandIndex)14 RandIndex (smile.validation.RandIndex)14 Attribute (smile.data.Attribute)12 ArrayList (java.util.ArrayList)7 EuclideanDistance (smile.math.distance.EuclideanDistance)5 IOException (java.io.IOException)4 BufferedReader (java.io.BufferedReader)3 ParseException (java.text.ParseException)3 LDA (smile.classification.LDA)3 InputStreamReader (java.io.InputStreamReader)2 DateAttribute (smile.data.DateAttribute)2 NumericAttribute (smile.data.NumericAttribute)2 StringAttribute (smile.data.StringAttribute)2 PlotCanvas (smile.plot.PlotCanvas)2 Accuracy (smile.validation.Accuracy)2 BorderLayout (java.awt.BorderLayout)1