use of smile.data.AttributeDataset in project smile by haifengl.
the class TXTParser method parse.
/**
* Parse a TXT dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
String[] tokens = line.split("\t", -1);
int start = 1;
int p = tokens.length - 1;
if (tokens[1].equalsIgnoreCase("description")) {
start = 2;
p = tokens.length - 2;
}
Attribute[] attributes = new Attribute[p];
for (int i = 0; i < p; i++) {
attributes[i] = new NumericAttribute(tokens[i + start]);
}
AttributeDataset data = new AttributeDataset(name, attributes);
for (int i = 2; (line = reader.readLine()) != null; i++) {
tokens = line.split("\t", -1);
if (tokens.length != p + start) {
throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
}
double[] x = new double[p];
for (int j = 0; j < p; j++) {
if (tokens[j + start].isEmpty()) {
x[j] = Double.NaN;
} else {
x[j] = Double.valueOf(tokens[j + start]);
}
}
Datum<double[]> datum = new Datum<>(x);
datum.name = tokens[0];
if (start == 2) {
datum.description = tokens[1];
}
data.add(datum);
}
reader.close();
return data;
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class ArffParserTest method testParseWeather.
/**
* Test of parse method, of class ArffParser.
*/
@Test
public void testParseWeather() throws Exception {
System.out.println("weather");
try {
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
AttributeDataset weather = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/weather.nominal.arff"));
double[][] x = weather.toArray(new double[weather.size()][]);
int[] y = weather.toArray(new int[weather.size()]);
assertEquals(Attribute.Type.NOMINAL, weather.response().getType());
for (Attribute attribute : weather.attributes()) {
assertEquals(Attribute.Type.NOMINAL, attribute.getType());
}
assertEquals(14, weather.size());
assertEquals(4, weather.attributes().length);
assertEquals("no", weather.response().toString(y[0]));
assertEquals("no", weather.response().toString(y[1]));
assertEquals("yes", weather.response().toString(y[2]));
assertEquals("sunny", weather.attributes()[0].toString(x[0][0]));
assertEquals("hot", weather.attributes()[1].toString(x[0][1]));
assertEquals("high", weather.attributes()[2].toString(x[0][2]));
assertEquals("FALSE", weather.attributes()[3].toString(x[0][3]));
assertEquals("no", weather.response().toString(y[13]));
assertEquals("rainy", weather.attributes()[0].toString(x[13][0]));
assertEquals("mild", weather.attributes()[1].toString(x[13][1]));
assertEquals("high", weather.attributes()[2].toString(x[13][2]));
assertEquals("TRUE", weather.attributes()[3].toString(x[13][3]));
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class ArffParserTest method testParseIris.
/**
* Test of parse method, of class ArffParser.
*/
@Test
public void testParseIris() throws Exception {
System.out.println("iris");
try {
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
double[][] x = iris.toArray(new double[iris.size()][]);
int[] y = iris.toArray(new int[iris.size()]);
assertEquals(Attribute.Type.NOMINAL, iris.response().getType());
for (Attribute attribute : iris.attributes()) {
assertEquals(Attribute.Type.NUMERIC, attribute.getType());
}
assertEquals(150, iris.size());
assertEquals(4, iris.attributes().length);
assertEquals("Iris-setosa", iris.response().toString(y[0]));
assertEquals("Iris-setosa", iris.response().toString(y[1]));
assertEquals("Iris-setosa", iris.response().toString(y[2]));
assertEquals(5.1, x[0][0], 1E-7);
assertEquals(3.5, x[0][1], 1E-7);
assertEquals(1.4, x[0][2], 1E-7);
assertEquals(0.2, x[0][3], 1E-7);
assertEquals("Iris-virginica", iris.response().toString(y[149]));
assertEquals(5.9, x[149][0], 1E-7);
assertEquals(3.0, x[149][1], 1E-7);
assertEquals(5.1, x[149][2], 1E-7);
assertEquals(1.8, x[149][3], 1E-7);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class ArffParser method parse.
/**
* Parse a dataset from given stream.
*/
public AttributeDataset parse(InputStream stream) throws IOException, ParseException {
try (Reader r = new BufferedReader(new InputStreamReader(stream))) {
StreamTokenizer tokenizer = new StreamTokenizer(r);
initTokenizer(tokenizer);
List<Attribute> attributes = new ArrayList<>();
String relationName = readHeader(tokenizer, attributes);
if (attributes.isEmpty()) {
throw new IOException("no header information available");
}
Attribute response = null;
Attribute[] attr = new Attribute[attributes.size()];
attributes.toArray(attr);
for (int i = 0; i < attributes.size(); i++) {
if (responseIndex == i) {
response = attributes.remove(i);
break;
}
}
AttributeDataset data = new AttributeDataset(relationName, attributes.toArray(new Attribute[attributes.size()]), response);
while (true) {
// Check if end of file reached.
getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
break;
}
// Parse instance
if (tokenizer.ttype == '{') {
data.add(getSparseInstance(tokenizer, attr));
} else {
data.add(getInstance(tokenizer, attr));
}
}
for (Attribute attribute : attributes) {
if (attribute instanceof NominalAttribute) {
NominalAttribute a = (NominalAttribute) attribute;
a.setOpen(false);
}
if (attribute instanceof StringAttribute) {
StringAttribute a = (StringAttribute) attribute;
a.setOpen(false);
}
}
return data;
}
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class DelimitedTextParser method parse.
/**
* Parse a dataset from a buffered reader.
* @param name the name of dataset.
* @param attributes the list attributes of data in proper order.
* @param reader the buffered reader for data.
* @throws java.io.IOException
*/
private AttributeDataset parse(String name, Attribute[] attributes, BufferedReader reader) throws IOException, ParseException {
String line = reader.readLine();
while (line != null) {
if (line.isEmpty() || line.startsWith(comment)) {
line = reader.readLine();
} else {
break;
}
}
if (line == null) {
throw new IOException("Empty data source.");
}
String[] s = line.split(delimiter, 0);
if (attributes == null) {
int p = s.length;
if (hasRowNames) {
p--;
}
if (responseIndex >= s.length) {
throw new ParseException("Invalid response variable index: " + responseIndex, responseIndex);
}
if (responseIndex >= 0) {
p--;
}
attributes = new Attribute[p];
for (int i = 0; i < p; i++) {
attributes[i] = new NumericAttribute("V" + (i + 1));
}
}
int ncols = attributes.length;
int startColumn = 0;
if (hasRowNames) {
ncols++;
startColumn = 1;
}
if (responseIndex >= 0) {
ncols++;
}
if (ncols != s.length)
throw new ParseException(String.format("%d columns, expected %d", s.length, ncols), s.length);
AttributeDataset data = new AttributeDataset(name, attributes, response);
if (hasColumnNames) {
for (int i = startColumn, k = 0; i < s.length; i++) {
if (i != responseIndex) {
attributes[k++].setName(s[i]);
} else {
response.setName(s[i]);
}
}
} else {
String rowName = hasRowNames ? s[0] : null;
double[] x = new double[attributes.length];
double y = Double.NaN;
for (int i = startColumn, k = 0; i < s.length; i++) {
if (i == responseIndex) {
y = response.valueOf(s[i]);
} else if (missing != null && missing.equalsIgnoreCase(s[i])) {
x[k++] = Double.NaN;
} else {
x[k] = attributes[k].valueOf(s[i]);
k++;
}
}
Datum<double[]> datum = new Datum<>(x, y);
datum.name = rowName;
data.add(datum);
}
while ((line = reader.readLine()) != null) {
if (line.isEmpty() || line.startsWith(comment)) {
continue;
}
s = line.split(delimiter, 0);
if (s.length != ncols) {
throw new ParseException(String.format("%d columns, expected %d", s.length, ncols), s.length);
}
String rowName = hasRowNames ? s[0] : null;
double[] x = new double[attributes.length];
double y = Double.NaN;
for (int i = startColumn, k = 0; i < s.length; i++) {
if (i == responseIndex) {
y = response.valueOf(s[i]);
} else if (missing != null && missing.equalsIgnoreCase(s[i])) {
x[k++] = Double.NaN;
} else {
x[k] = attributes[k].valueOf(s[i]);
k++;
}
}
Datum<double[]> datum = new Datum<>(x, y);
datum.name = rowName;
data.add(datum);
}
return data;
}
Aggregations