Search in sources :

Example 1 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class GCTParserTest method testParse.

/**
     * Test of parse method, of class GCTParser.
     */
@Test
public void testParse() throws Exception {
    System.out.println("parse");
    GCTParser parser = new GCTParser();
    try {
        AttributeDataset data = parser.parse("GCT", smile.data.parser.IOUtils.getTestDataFile("microarray/allaml.dataset.gct"));
        double[][] x = data.toArray(new double[data.size()][]);
        String[] id = data.toArray(new String[data.size()]);
        for (Attribute attribute : data.attributes()) {
            assertEquals(Attribute.Type.NUMERIC, attribute.getType());
            System.out.println(attribute.getName());
        }
        assertEquals(12564, data.size());
        assertEquals(48, data.attributes().length);
        assertEquals("AFFX-MurIL2_at", id[0]);
        assertEquals(-161.8, x[0][0], 1E-7);
        assertEquals(-231.0, x[0][1], 1E-7);
        assertEquals(-279.0, x[0][2], 1E-7);
        assertEquals("128_at", id[12563]);
        assertEquals(95.0, x[12563][45], 1E-7);
        assertEquals(108.0, x[12563][46], 1E-7);
        assertEquals(346.0, x[12563][47], 1E-7);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 2 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class GCTParser method parse.

/**
     * Parse a GCT dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    String line = reader.readLine();
    if (line == null) {
        throw new IOException("Empty data source.");
    }
    if (!line.equals("#1.2")) {
        throw new IOException("Invalid version.");
    }
    line = reader.readLine();
    if (line == null) {
        throw new IOException("Premature end of file.");
    }
    String[] tokens = line.split("\t", -1);
    if (tokens.length != 2) {
        throw new IOException("Invalid data size inforamation.");
    }
    int n = Integer.parseInt(tokens[0]);
    int p = Integer.parseInt(tokens[1]);
    if (n <= 0 || p <= 0) {
        throw new IOException(String.format("Invalid data size %d x %d.", n, p));
    }
    Attribute[] attributes = new Attribute[p];
    line = reader.readLine();
    if (line == null) {
        throw new IOException("Premature end of file.");
    }
    tokens = line.split("\t", -1);
    if (tokens.length != p + 2) {
        throw new IOException("Invalid title header.");
    }
    for (int i = 0; i < p; i++) {
        attributes[i] = new NumericAttribute(tokens[i + 2]);
    }
    AttributeDataset data = new AttributeDataset(name, attributes);
    for (int i = 0; i < n; i++) {
        line = reader.readLine();
        if (line == null) {
            throw new IOException("Premature end of file.");
        }
        tokens = line.split("\t", -1);
        if (tokens.length != p + 2) {
            throw new IOException(String.format("Invalid number of elements of line %d: %d", i + 4, tokens.length));
        }
        double[] x = new double[p];
        for (int j = 0; j < p; j++) {
            if (tokens[j + 2].isEmpty()) {
                x[j] = Double.NaN;
            } else {
                x[j] = Double.valueOf(tokens[j + 2]);
            }
        }
        Datum<double[]> datum = new Datum<>(x);
        datum.name = tokens[0];
        datum.description = tokens[1];
        data.add(datum);
    }
    reader.close();
    return data;
}
Also used : AttributeDataset(smile.data.AttributeDataset) Datum(smile.data.Datum) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NumericAttribute(smile.data.NumericAttribute) IOException(java.io.IOException) NumericAttribute(smile.data.NumericAttribute) BufferedReader(java.io.BufferedReader)

Example 3 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class PCLParser method parse.

/**
     * Parse a PCL dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    String line = reader.readLine();
    if (line == null) {
        throw new IOException("Empty data source.");
    }
    String[] tokens = line.split("\t", -1);
    int p = tokens.length - 3;
    line = reader.readLine();
    if (line == null) {
        throw new IOException("Premature end of file.");
    }
    String[] weight = line.split("\t", -1);
    if (weight.length != tokens.length) {
        throw new IOException("Invalid sample weight header.");
    }
    Attribute[] attributes = new Attribute[p];
    for (int i = 0; i < p; i++) {
        attributes[i] = new NumericAttribute(tokens[i + 3], null, Double.valueOf(weight[i + 3]));
    }
    AttributeDataset data = new AttributeDataset(name, attributes);
    for (int i = 3; (line = reader.readLine()) != null; i++) {
        tokens = line.split("\t", -1);
        if (tokens.length != weight.length) {
            throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
        }
        double[] x = new double[p];
        for (int j = 0; j < p; j++) {
            if (tokens[j + 3].isEmpty()) {
                x[j] = Double.NaN;
            } else {
                x[j] = Double.valueOf(tokens[j + 3]);
            }
        }
        Datum<double[]> datum = new Datum<>(x);
        datum.name = tokens[0];
        datum.description = tokens[1];
        datum.weight = Double.valueOf(tokens[2]);
        data.add(datum);
    }
    reader.close();
    return data;
}
Also used : AttributeDataset(smile.data.AttributeDataset) Datum(smile.data.Datum) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NumericAttribute(smile.data.NumericAttribute) IOException(java.io.IOException) NumericAttribute(smile.data.NumericAttribute) BufferedReader(java.io.BufferedReader)

Example 4 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class TXTParser method parse.

/**
     * Parse a TXT dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    String line = reader.readLine();
    if (line == null) {
        throw new IOException("Empty data source.");
    }
    String[] tokens = line.split("\t", -1);
    int start = 1;
    int p = tokens.length - 1;
    if (tokens[1].equalsIgnoreCase("description")) {
        start = 2;
        p = tokens.length - 2;
    }
    Attribute[] attributes = new Attribute[p];
    for (int i = 0; i < p; i++) {
        attributes[i] = new NumericAttribute(tokens[i + start]);
    }
    AttributeDataset data = new AttributeDataset(name, attributes);
    for (int i = 2; (line = reader.readLine()) != null; i++) {
        tokens = line.split("\t", -1);
        if (tokens.length != p + start) {
            throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
        }
        double[] x = new double[p];
        for (int j = 0; j < p; j++) {
            if (tokens[j + start].isEmpty()) {
                x[j] = Double.NaN;
            } else {
                x[j] = Double.valueOf(tokens[j + start]);
            }
        }
        Datum<double[]> datum = new Datum<>(x);
        datum.name = tokens[0];
        if (start == 2) {
            datum.description = tokens[1];
        }
        data.add(datum);
    }
    reader.close();
    return data;
}
Also used : AttributeDataset(smile.data.AttributeDataset) Datum(smile.data.Datum) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NumericAttribute(smile.data.NumericAttribute) IOException(java.io.IOException) NumericAttribute(smile.data.NumericAttribute) BufferedReader(java.io.BufferedReader)

Example 5 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class ArffParserTest method testParseWeather.

/**
     * Test of parse method, of class ArffParser.
     */
@Test
public void testParseWeather() throws Exception {
    System.out.println("weather");
    try {
        ArffParser arffParser = new ArffParser();
        arffParser.setResponseIndex(4);
        AttributeDataset weather = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/weather.nominal.arff"));
        double[][] x = weather.toArray(new double[weather.size()][]);
        int[] y = weather.toArray(new int[weather.size()]);
        assertEquals(Attribute.Type.NOMINAL, weather.response().getType());
        for (Attribute attribute : weather.attributes()) {
            assertEquals(Attribute.Type.NOMINAL, attribute.getType());
        }
        assertEquals(14, weather.size());
        assertEquals(4, weather.attributes().length);
        assertEquals("no", weather.response().toString(y[0]));
        assertEquals("no", weather.response().toString(y[1]));
        assertEquals("yes", weather.response().toString(y[2]));
        assertEquals("sunny", weather.attributes()[0].toString(x[0][0]));
        assertEquals("hot", weather.attributes()[1].toString(x[0][1]));
        assertEquals("high", weather.attributes()[2].toString(x[0][2]));
        assertEquals("FALSE", weather.attributes()[3].toString(x[0][3]));
        assertEquals("no", weather.response().toString(y[13]));
        assertEquals("rainy", weather.attributes()[0].toString(x[13][0]));
        assertEquals("mild", weather.attributes()[1].toString(x[13][1]));
        assertEquals("high", weather.attributes()[2].toString(x[13][2]));
        assertEquals("TRUE", weather.attributes()[3].toString(x[13][3]));
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) Test(org.junit.Test)

Aggregations

Attribute (smile.data.Attribute)35 AttributeDataset (smile.data.AttributeDataset)29 Test (org.junit.Test)24 NominalAttribute (smile.data.NominalAttribute)15 ArffParser (smile.data.parser.ArffParser)8 DelimitedTextParser (smile.data.parser.DelimitedTextParser)8 BufferedReader (java.io.BufferedReader)7 InputStreamReader (java.io.InputStreamReader)7 NumericAttribute (smile.data.NumericAttribute)7 IOException (java.io.IOException)6 Datum (smile.data.Datum)4 GridLayout (java.awt.GridLayout)3 ArrayList (java.util.ArrayList)3 JPanel (javax.swing.JPanel)3 DateAttribute (smile.data.DateAttribute)3 StringAttribute (smile.data.StringAttribute)3 PlotCanvas (smile.plot.PlotCanvas)3 Reader (java.io.Reader)2 StreamTokenizer (java.io.StreamTokenizer)2 ParseException (java.text.ParseException)1