Search in sources :

Example 16 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class TXTParser method parse.

/**
     * Parse a TXT dataset from an input stream.
     * @param name the name of dataset.
     * @param stream the input stream of data.
     * @throws java.io.IOException
     */
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
    String line = reader.readLine();
    if (line == null) {
        throw new IOException("Empty data source.");
    }
    String[] tokens = line.split("\t", -1);
    int start = 1;
    int p = tokens.length - 1;
    if (tokens[1].equalsIgnoreCase("description")) {
        start = 2;
        p = tokens.length - 2;
    }
    Attribute[] attributes = new Attribute[p];
    for (int i = 0; i < p; i++) {
        attributes[i] = new NumericAttribute(tokens[i + start]);
    }
    AttributeDataset data = new AttributeDataset(name, attributes);
    for (int i = 2; (line = reader.readLine()) != null; i++) {
        tokens = line.split("\t", -1);
        if (tokens.length != p + start) {
            throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
        }
        double[] x = new double[p];
        for (int j = 0; j < p; j++) {
            if (tokens[j + start].isEmpty()) {
                x[j] = Double.NaN;
            } else {
                x[j] = Double.valueOf(tokens[j + start]);
            }
        }
        Datum<double[]> datum = new Datum<>(x);
        datum.name = tokens[0];
        if (start == 2) {
            datum.description = tokens[1];
        }
        data.add(datum);
    }
    reader.close();
    return data;
}
Also used : AttributeDataset(smile.data.AttributeDataset) Datum(smile.data.Datum) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NumericAttribute(smile.data.NumericAttribute) IOException(java.io.IOException) NumericAttribute(smile.data.NumericAttribute) BufferedReader(java.io.BufferedReader)

Example 17 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class ArffParserTest method testParseWeather.

/**
     * Test of parse method, of class ArffParser.
     */
@Test
public void testParseWeather() throws Exception {
    System.out.println("weather");
    try {
        ArffParser arffParser = new ArffParser();
        arffParser.setResponseIndex(4);
        AttributeDataset weather = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/weather.nominal.arff"));
        double[][] x = weather.toArray(new double[weather.size()][]);
        int[] y = weather.toArray(new int[weather.size()]);
        assertEquals(Attribute.Type.NOMINAL, weather.response().getType());
        for (Attribute attribute : weather.attributes()) {
            assertEquals(Attribute.Type.NOMINAL, attribute.getType());
        }
        assertEquals(14, weather.size());
        assertEquals(4, weather.attributes().length);
        assertEquals("no", weather.response().toString(y[0]));
        assertEquals("no", weather.response().toString(y[1]));
        assertEquals("yes", weather.response().toString(y[2]));
        assertEquals("sunny", weather.attributes()[0].toString(x[0][0]));
        assertEquals("hot", weather.attributes()[1].toString(x[0][1]));
        assertEquals("high", weather.attributes()[2].toString(x[0][2]));
        assertEquals("FALSE", weather.attributes()[3].toString(x[0][3]));
        assertEquals("no", weather.response().toString(y[13]));
        assertEquals("rainy", weather.attributes()[0].toString(x[13][0]));
        assertEquals("mild", weather.attributes()[1].toString(x[13][1]));
        assertEquals("high", weather.attributes()[2].toString(x[13][2]));
        assertEquals("TRUE", weather.attributes()[3].toString(x[13][3]));
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 18 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class ArffParserTest method testParseIris.

/**
     * Test of parse method, of class ArffParser.
     */
@Test
public void testParseIris() throws Exception {
    System.out.println("iris");
    try {
        ArffParser arffParser = new ArffParser();
        arffParser.setResponseIndex(4);
        AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
        double[][] x = iris.toArray(new double[iris.size()][]);
        int[] y = iris.toArray(new int[iris.size()]);
        assertEquals(Attribute.Type.NOMINAL, iris.response().getType());
        for (Attribute attribute : iris.attributes()) {
            assertEquals(Attribute.Type.NUMERIC, attribute.getType());
        }
        assertEquals(150, iris.size());
        assertEquals(4, iris.attributes().length);
        assertEquals("Iris-setosa", iris.response().toString(y[0]));
        assertEquals("Iris-setosa", iris.response().toString(y[1]));
        assertEquals("Iris-setosa", iris.response().toString(y[2]));
        assertEquals(5.1, x[0][0], 1E-7);
        assertEquals(3.5, x[0][1], 1E-7);
        assertEquals(1.4, x[0][2], 1E-7);
        assertEquals(0.2, x[0][3], 1E-7);
        assertEquals("Iris-virginica", iris.response().toString(y[149]));
        assertEquals(5.9, x[149][0], 1E-7);
        assertEquals(3.0, x[149][1], 1E-7);
        assertEquals(5.1, x[149][2], 1E-7);
        assertEquals(1.8, x[149][3], 1E-7);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 19 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class ArffParser method parse.

/**
     * Parse a dataset from given stream.
     */
public AttributeDataset parse(InputStream stream) throws IOException, ParseException {
    try (Reader r = new BufferedReader(new InputStreamReader(stream))) {
        StreamTokenizer tokenizer = new StreamTokenizer(r);
        initTokenizer(tokenizer);
        List<Attribute> attributes = new ArrayList<>();
        String relationName = readHeader(tokenizer, attributes);
        if (attributes.isEmpty()) {
            throw new IOException("no header information available");
        }
        Attribute response = null;
        Attribute[] attr = new Attribute[attributes.size()];
        attributes.toArray(attr);
        for (int i = 0; i < attributes.size(); i++) {
            if (responseIndex == i) {
                response = attributes.remove(i);
                break;
            }
        }
        AttributeDataset data = new AttributeDataset(relationName, attributes.toArray(new Attribute[attributes.size()]), response);
        while (true) {
            // Check if end of file reached.
            getFirstToken(tokenizer);
            if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
                break;
            }
            // Parse instance
            if (tokenizer.ttype == '{') {
                data.add(getSparseInstance(tokenizer, attr));
            } else {
                data.add(getInstance(tokenizer, attr));
            }
        }
        for (Attribute attribute : attributes) {
            if (attribute instanceof NominalAttribute) {
                NominalAttribute a = (NominalAttribute) attribute;
                a.setOpen(false);
            }
            if (attribute instanceof StringAttribute) {
                StringAttribute a = (StringAttribute) attribute;
                a.setOpen(false);
            }
        }
        return data;
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute) StringAttribute(smile.data.StringAttribute) ArrayList(java.util.ArrayList) StringAttribute(smile.data.StringAttribute) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) NominalAttribute(smile.data.NominalAttribute) BufferedReader(java.io.BufferedReader) StreamTokenizer(java.io.StreamTokenizer)

Example 20 with AttributeDataset

use of smile.data.AttributeDataset in project smile by haifengl.

the class DelimitedTextParser method parse.

/**
     * Parse a dataset from a buffered reader.
     * @param name the name of dataset.
     * @param attributes the list attributes of data in proper order.
     * @param reader the buffered reader for data.
     * @throws java.io.IOException
     */
private AttributeDataset parse(String name, Attribute[] attributes, BufferedReader reader) throws IOException, ParseException {
    String line = reader.readLine();
    while (line != null) {
        if (line.isEmpty() || line.startsWith(comment)) {
            line = reader.readLine();
        } else {
            break;
        }
    }
    if (line == null) {
        throw new IOException("Empty data source.");
    }
    String[] s = line.split(delimiter, 0);
    if (attributes == null) {
        int p = s.length;
        if (hasRowNames) {
            p--;
        }
        if (responseIndex >= s.length) {
            throw new ParseException("Invalid response variable index: " + responseIndex, responseIndex);
        }
        if (responseIndex >= 0) {
            p--;
        }
        attributes = new Attribute[p];
        for (int i = 0; i < p; i++) {
            attributes[i] = new NumericAttribute("V" + (i + 1));
        }
    }
    int ncols = attributes.length;
    int startColumn = 0;
    if (hasRowNames) {
        ncols++;
        startColumn = 1;
    }
    if (responseIndex >= 0) {
        ncols++;
    }
    if (ncols != s.length)
        throw new ParseException(String.format("%d columns, expected %d", s.length, ncols), s.length);
    AttributeDataset data = new AttributeDataset(name, attributes, response);
    if (hasColumnNames) {
        for (int i = startColumn, k = 0; i < s.length; i++) {
            if (i != responseIndex) {
                attributes[k++].setName(s[i]);
            } else {
                response.setName(s[i]);
            }
        }
    } else {
        String rowName = hasRowNames ? s[0] : null;
        double[] x = new double[attributes.length];
        double y = Double.NaN;
        for (int i = startColumn, k = 0; i < s.length; i++) {
            if (i == responseIndex) {
                y = response.valueOf(s[i]);
            } else if (missing != null && missing.equalsIgnoreCase(s[i])) {
                x[k++] = Double.NaN;
            } else {
                x[k] = attributes[k].valueOf(s[i]);
                k++;
            }
        }
        Datum<double[]> datum = new Datum<>(x, y);
        datum.name = rowName;
        data.add(datum);
    }
    while ((line = reader.readLine()) != null) {
        if (line.isEmpty() || line.startsWith(comment)) {
            continue;
        }
        s = line.split(delimiter, 0);
        if (s.length != ncols) {
            throw new ParseException(String.format("%d columns, expected %d", s.length, ncols), s.length);
        }
        String rowName = hasRowNames ? s[0] : null;
        double[] x = new double[attributes.length];
        double y = Double.NaN;
        for (int i = startColumn, k = 0; i < s.length; i++) {
            if (i == responseIndex) {
                y = response.valueOf(s[i]);
            } else if (missing != null && missing.equalsIgnoreCase(s[i])) {
                x[k++] = Double.NaN;
            } else {
                x[k] = attributes[k].valueOf(s[i]);
                k++;
            }
        }
        Datum<double[]> datum = new Datum<>(x, y);
        datum.name = rowName;
        data.add(datum);
    }
    return data;
}
Also used : AttributeDataset(smile.data.AttributeDataset) Datum(smile.data.Datum) IOException(java.io.IOException) ParseException(java.text.ParseException) NumericAttribute(smile.data.NumericAttribute)

Aggregations

AttributeDataset (smile.data.AttributeDataset)140 Test (org.junit.Test)125 ArffParser (smile.data.parser.ArffParser)75 NominalAttribute (smile.data.NominalAttribute)50 DelimitedTextParser (smile.data.parser.DelimitedTextParser)48 Attribute (smile.data.Attribute)29 EuclideanDistance (smile.math.distance.EuclideanDistance)19 LOOCV (smile.validation.LOOCV)18 CrossValidation (smile.validation.CrossValidation)17 AdjustedRandIndex (smile.validation.AdjustedRandIndex)14 RandIndex (smile.validation.RandIndex)14 ClassifierTrainer (smile.classification.ClassifierTrainer)13 GaussianKernel (smile.math.kernel.GaussianKernel)11 IOException (java.io.IOException)10 RadialBasisFunction (smile.math.rbf.RadialBasisFunction)9 RBFNetwork (smile.regression.RBFNetwork)8 ArrayList (java.util.ArrayList)6 KMeans (smile.clustering.KMeans)6 Datum (smile.data.Datum)6 NumericAttribute (smile.data.NumericAttribute)6