Search in sources :

Example 6 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class ArffParserTest method testParseIris.

/**
     * Test of parse method, of class ArffParser.
     */
@Test
public void testParseIris() throws Exception {
    System.out.println("iris");
    try {
        ArffParser arffParser = new ArffParser();
        arffParser.setResponseIndex(4);
        AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
        double[][] x = iris.toArray(new double[iris.size()][]);
        int[] y = iris.toArray(new int[iris.size()]);
        assertEquals(Attribute.Type.NOMINAL, iris.response().getType());
        for (Attribute attribute : iris.attributes()) {
            assertEquals(Attribute.Type.NUMERIC, attribute.getType());
        }
        assertEquals(150, iris.size());
        assertEquals(4, iris.attributes().length);
        assertEquals("Iris-setosa", iris.response().toString(y[0]));
        assertEquals("Iris-setosa", iris.response().toString(y[1]));
        assertEquals("Iris-setosa", iris.response().toString(y[2]));
        assertEquals(5.1, x[0][0], 1E-7);
        assertEquals(3.5, x[0][1], 1E-7);
        assertEquals(1.4, x[0][2], 1E-7);
        assertEquals(0.2, x[0][3], 1E-7);
        assertEquals("Iris-virginica", iris.response().toString(y[149]));
        assertEquals(5.9, x[149][0], 1E-7);
        assertEquals(3.0, x[149][1], 1E-7);
        assertEquals(5.1, x[149][2], 1E-7);
        assertEquals(1.8, x[149][3], 1E-7);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 7 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class ArffParser method getAttributes.

/**
     * Returns the attribute set of given stream.
     */
public static Attribute[] getAttributes(InputStream stream) throws IOException, ParseException {
    Reader r = new BufferedReader(new InputStreamReader(stream));
    StreamTokenizer tokenizer = new StreamTokenizer(r);
    ArffParser parser = new ArffParser();
    parser.initTokenizer(tokenizer);
    List<Attribute> attributes = new ArrayList<>();
    parser.readHeader(tokenizer, attributes);
    return attributes.toArray(new Attribute[attributes.size()]);
}
Also used : InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute) StringAttribute(smile.data.StringAttribute) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) StreamTokenizer(java.io.StreamTokenizer)

Example 8 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class ArffParser method parse.

/**
     * Parse a dataset from given stream.
     */
public AttributeDataset parse(InputStream stream) throws IOException, ParseException {
    try (Reader r = new BufferedReader(new InputStreamReader(stream))) {
        StreamTokenizer tokenizer = new StreamTokenizer(r);
        initTokenizer(tokenizer);
        List<Attribute> attributes = new ArrayList<>();
        String relationName = readHeader(tokenizer, attributes);
        if (attributes.isEmpty()) {
            throw new IOException("no header information available");
        }
        Attribute response = null;
        Attribute[] attr = new Attribute[attributes.size()];
        attributes.toArray(attr);
        for (int i = 0; i < attributes.size(); i++) {
            if (responseIndex == i) {
                response = attributes.remove(i);
                break;
            }
        }
        AttributeDataset data = new AttributeDataset(relationName, attributes.toArray(new Attribute[attributes.size()]), response);
        while (true) {
            // Check if end of file reached.
            getFirstToken(tokenizer);
            if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
                break;
            }
            // Parse instance
            if (tokenizer.ttype == '{') {
                data.add(getSparseInstance(tokenizer, attr));
            } else {
                data.add(getInstance(tokenizer, attr));
            }
        }
        for (Attribute attribute : attributes) {
            if (attribute instanceof NominalAttribute) {
                NominalAttribute a = (NominalAttribute) attribute;
                a.setOpen(false);
            }
            if (attribute instanceof StringAttribute) {
                StringAttribute a = (StringAttribute) attribute;
                a.setOpen(false);
            }
        }
        return data;
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute) StringAttribute(smile.data.StringAttribute) ArrayList(java.util.ArrayList) StringAttribute(smile.data.StringAttribute) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) NominalAttribute(smile.data.NominalAttribute) BufferedReader(java.io.BufferedReader) StreamTokenizer(java.io.StreamTokenizer)

Example 9 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class LibsvmParser method parse.

/**
     * Parse a libsvm sparse dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public SparseDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    try {
        String line = reader.readLine();
        if (line == null) {
            throw new IOException("Empty data source.");
        }
        String[] tokens = line.trim().split("\\s+");
        boolean classification = true;
        Attribute response = null;
        try {
            Integer.valueOf(tokens[0]);
            response = new NominalAttribute("class");
        } catch (NumberFormatException e) {
            try {
                Double.valueOf(tokens[0]);
                response = new NominalAttribute("response");
                classification = false;
            } catch (NumberFormatException ex) {
                logger.error("Failed to parse {}", tokens[0], ex);
                throw new NumberFormatException("Unrecognized response variable value: " + tokens[0]);
            }
        }
        SparseDataset sparse = new SparseDataset(name, response);
        for (int i = 0; line != null; i++) {
            tokens = line.trim().split("\\s+");
            if (classification) {
                int y = Integer.parseInt(tokens[0]);
                sparse.set(i, y);
            } else {
                double y = Double.parseDouble(tokens[0]);
                sparse.set(i, y);
            }
            for (int k = 1; k < tokens.length; k++) {
                String[] pair = tokens[k].split(":");
                if (pair.length != 2) {
                    throw new NumberFormatException("Invalid data: " + tokens[k]);
                }
                int j = Integer.parseInt(pair[0]) - 1;
                double x = Double.parseDouble(pair[1]);
                sparse.set(i, j, x);
            }
            line = reader.readLine();
        }
        if (classification) {
            int n = sparse.size();
            int[] y = sparse.toArray(new int[n]);
            int[] label = Math.unique(y);
            Arrays.sort(label);
            for (int c : label) {
                response.valueOf(String.valueOf(c));
            }
            for (int i = 0; i < n; i++) {
                sparse.get(i).y = Arrays.binarySearch(label, y[i]);
            }
        }
        return sparse;
    } finally {
        reader.close();
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) IOException(java.io.IOException) NominalAttribute(smile.data.NominalAttribute) BufferedReader(java.io.BufferedReader) SparseDataset(smile.data.SparseDataset)

Example 10 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class SammonMappingDemo method learn.

/**
     * Execute the MDS algorithm and return a swing JComponent representing
     * the clusters.
     */
public JComponent learn() {
    JPanel pane = new JPanel(new GridLayout(1, 2));
    double[][] data = dataset[datasetIndex].toArray(new double[dataset[datasetIndex].size()][]);
    String[] labels = dataset[datasetIndex].toArray(new String[dataset[datasetIndex].size()]);
    if (labels[0] == null) {
        Attribute[] attr = dataset[datasetIndex].attributes();
        labels = new String[attr.length];
        for (int i = 0; i < labels.length; i++) {
            labels[i] = attr[i].getName();
        }
    }
    long clock = System.currentTimeMillis();
    SammonMapping sammon = new SammonMapping(data, 2);
    System.out.format("Learn Sammon's Mapping (k=2) from %d samples in %dms\n", data.length, System.currentTimeMillis() - clock);
    PlotCanvas plot = ScatterPlot.plot(sammon.getCoordinates(), labels);
    plot.setTitle("Sammon's Mapping (k = 2)");
    pane.add(plot);
    clock = System.currentTimeMillis();
    sammon = new SammonMapping(data, 3);
    System.out.format("Learn Sammon's Mapping (k=3) from %d samples in %dms\n", data.length, System.currentTimeMillis() - clock);
    plot = ScatterPlot.plot(sammon.getCoordinates(), labels);
    plot.setTitle("Sammon's Mapping (k = 3)");
    pane.add(plot);
    return pane;
}
Also used : JPanel(javax.swing.JPanel) GridLayout(java.awt.GridLayout) SammonMapping(smile.mds.SammonMapping) Attribute(smile.data.Attribute) PlotCanvas(smile.plot.PlotCanvas)

Aggregations

Attribute (smile.data.Attribute)35 AttributeDataset (smile.data.AttributeDataset)29 Test (org.junit.Test)24 NominalAttribute (smile.data.NominalAttribute)15 ArffParser (smile.data.parser.ArffParser)8 DelimitedTextParser (smile.data.parser.DelimitedTextParser)8 BufferedReader (java.io.BufferedReader)7 InputStreamReader (java.io.InputStreamReader)7 NumericAttribute (smile.data.NumericAttribute)7 IOException (java.io.IOException)6 Datum (smile.data.Datum)4 GridLayout (java.awt.GridLayout)3 ArrayList (java.util.ArrayList)3 JPanel (javax.swing.JPanel)3 DateAttribute (smile.data.DateAttribute)3 StringAttribute (smile.data.StringAttribute)3 PlotCanvas (smile.plot.PlotCanvas)3 Reader (java.io.Reader)2 StreamTokenizer (java.io.StreamTokenizer)2 ParseException (java.text.ParseException)1