use of smile.data.Attribute in project smile by haifengl.
the class GCTParserTest method testParse.
/**
* Test of parse method, of class GCTParser.
*/
@Test
public void testParse() throws Exception {
System.out.println("parse");
GCTParser parser = new GCTParser();
try {
AttributeDataset data = parser.parse("GCT", smile.data.parser.IOUtils.getTestDataFile("microarray/allaml.dataset.gct"));
double[][] x = data.toArray(new double[data.size()][]);
String[] id = data.toArray(new String[data.size()]);
for (Attribute attribute : data.attributes()) {
assertEquals(Attribute.Type.NUMERIC, attribute.getType());
System.out.println(attribute.getName());
}
assertEquals(12564, data.size());
assertEquals(48, data.attributes().length);
assertEquals("AFFX-MurIL2_at", id[0]);
assertEquals(-161.8, x[0][0], 1E-7);
assertEquals(-231.0, x[0][1], 1E-7);
assertEquals(-279.0, x[0][2], 1E-7);
assertEquals("128_at", id[12563]);
assertEquals(95.0, x[12563][45], 1E-7);
assertEquals(108.0, x[12563][46], 1E-7);
assertEquals(346.0, x[12563][47], 1E-7);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.Attribute in project smile by haifengl.
the class GCTParser method parse.
/**
* Parse a GCT dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
if (!line.equals("#1.2")) {
throw new IOException("Invalid version.");
}
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
String[] tokens = line.split("\t", -1);
if (tokens.length != 2) {
throw new IOException("Invalid data size inforamation.");
}
int n = Integer.parseInt(tokens[0]);
int p = Integer.parseInt(tokens[1]);
if (n <= 0 || p <= 0) {
throw new IOException(String.format("Invalid data size %d x %d.", n, p));
}
Attribute[] attributes = new Attribute[p];
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
tokens = line.split("\t", -1);
if (tokens.length != p + 2) {
throw new IOException("Invalid title header.");
}
for (int i = 0; i < p; i++) {
attributes[i] = new NumericAttribute(tokens[i + 2]);
}
AttributeDataset data = new AttributeDataset(name, attributes);
for (int i = 0; i < n; i++) {
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
tokens = line.split("\t", -1);
if (tokens.length != p + 2) {
throw new IOException(String.format("Invalid number of elements of line %d: %d", i + 4, tokens.length));
}
double[] x = new double[p];
for (int j = 0; j < p; j++) {
if (tokens[j + 2].isEmpty()) {
x[j] = Double.NaN;
} else {
x[j] = Double.valueOf(tokens[j + 2]);
}
}
Datum<double[]> datum = new Datum<>(x);
datum.name = tokens[0];
datum.description = tokens[1];
data.add(datum);
}
reader.close();
return data;
}
use of smile.data.Attribute in project smile by haifengl.
the class PCLParser method parse.
/**
* Parse a PCL dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
String[] tokens = line.split("\t", -1);
int p = tokens.length - 3;
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
String[] weight = line.split("\t", -1);
if (weight.length != tokens.length) {
throw new IOException("Invalid sample weight header.");
}
Attribute[] attributes = new Attribute[p];
for (int i = 0; i < p; i++) {
attributes[i] = new NumericAttribute(tokens[i + 3], null, Double.valueOf(weight[i + 3]));
}
AttributeDataset data = new AttributeDataset(name, attributes);
for (int i = 3; (line = reader.readLine()) != null; i++) {
tokens = line.split("\t", -1);
if (tokens.length != weight.length) {
throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
}
double[] x = new double[p];
for (int j = 0; j < p; j++) {
if (tokens[j + 3].isEmpty()) {
x[j] = Double.NaN;
} else {
x[j] = Double.valueOf(tokens[j + 3]);
}
}
Datum<double[]> datum = new Datum<>(x);
datum.name = tokens[0];
datum.description = tokens[1];
datum.weight = Double.valueOf(tokens[2]);
data.add(datum);
}
reader.close();
return data;
}
use of smile.data.Attribute in project smile by haifengl.
the class TXTParser method parse.
/**
* Parse a TXT dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
String[] tokens = line.split("\t", -1);
int start = 1;
int p = tokens.length - 1;
if (tokens[1].equalsIgnoreCase("description")) {
start = 2;
p = tokens.length - 2;
}
Attribute[] attributes = new Attribute[p];
for (int i = 0; i < p; i++) {
attributes[i] = new NumericAttribute(tokens[i + start]);
}
AttributeDataset data = new AttributeDataset(name, attributes);
for (int i = 2; (line = reader.readLine()) != null; i++) {
tokens = line.split("\t", -1);
if (tokens.length != p + start) {
throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
}
double[] x = new double[p];
for (int j = 0; j < p; j++) {
if (tokens[j + start].isEmpty()) {
x[j] = Double.NaN;
} else {
x[j] = Double.valueOf(tokens[j + start]);
}
}
Datum<double[]> datum = new Datum<>(x);
datum.name = tokens[0];
if (start == 2) {
datum.description = tokens[1];
}
data.add(datum);
}
reader.close();
return data;
}
use of smile.data.Attribute in project smile by haifengl.
the class ArffParserTest method testParseWeather.
/**
* Test of parse method, of class ArffParser.
*/
@Test
public void testParseWeather() throws Exception {
System.out.println("weather");
try {
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
AttributeDataset weather = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/weather.nominal.arff"));
double[][] x = weather.toArray(new double[weather.size()][]);
int[] y = weather.toArray(new int[weather.size()]);
assertEquals(Attribute.Type.NOMINAL, weather.response().getType());
for (Attribute attribute : weather.attributes()) {
assertEquals(Attribute.Type.NOMINAL, attribute.getType());
}
assertEquals(14, weather.size());
assertEquals(4, weather.attributes().length);
assertEquals("no", weather.response().toString(y[0]));
assertEquals("no", weather.response().toString(y[1]));
assertEquals("yes", weather.response().toString(y[2]));
assertEquals("sunny", weather.attributes()[0].toString(x[0][0]));
assertEquals("hot", weather.attributes()[1].toString(x[0][1]));
assertEquals("high", weather.attributes()[2].toString(x[0][2]));
assertEquals("FALSE", weather.attributes()[3].toString(x[0][3]));
assertEquals("no", weather.response().toString(y[13]));
assertEquals("rainy", weather.attributes()[0].toString(x[13][0]));
assertEquals("mild", weather.attributes()[1].toString(x[13][1]));
assertEquals("high", weather.attributes()[2].toString(x[13][2]));
assertEquals("TRUE", weather.attributes()[3].toString(x[13][3]));
} catch (Exception ex) {
System.err.println(ex);
}
}
Aggregations