use of smile.data.Attribute in project smile by haifengl.
the class ArffParserTest method testParseIris.
/**
* Test of parse method, of class ArffParser.
*/
@Test
public void testParseIris() throws Exception {
System.out.println("iris");
try {
ArffParser arffParser = new ArffParser();
arffParser.setResponseIndex(4);
AttributeDataset iris = arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
double[][] x = iris.toArray(new double[iris.size()][]);
int[] y = iris.toArray(new int[iris.size()]);
assertEquals(Attribute.Type.NOMINAL, iris.response().getType());
for (Attribute attribute : iris.attributes()) {
assertEquals(Attribute.Type.NUMERIC, attribute.getType());
}
assertEquals(150, iris.size());
assertEquals(4, iris.attributes().length);
assertEquals("Iris-setosa", iris.response().toString(y[0]));
assertEquals("Iris-setosa", iris.response().toString(y[1]));
assertEquals("Iris-setosa", iris.response().toString(y[2]));
assertEquals(5.1, x[0][0], 1E-7);
assertEquals(3.5, x[0][1], 1E-7);
assertEquals(1.4, x[0][2], 1E-7);
assertEquals(0.2, x[0][3], 1E-7);
assertEquals("Iris-virginica", iris.response().toString(y[149]));
assertEquals(5.9, x[149][0], 1E-7);
assertEquals(3.0, x[149][1], 1E-7);
assertEquals(5.1, x[149][2], 1E-7);
assertEquals(1.8, x[149][3], 1E-7);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.Attribute in project smile by haifengl.
the class ArffParser method getAttributes.
/**
* Returns the attribute set of given stream.
*/
public static Attribute[] getAttributes(InputStream stream) throws IOException, ParseException {
Reader r = new BufferedReader(new InputStreamReader(stream));
StreamTokenizer tokenizer = new StreamTokenizer(r);
ArffParser parser = new ArffParser();
parser.initTokenizer(tokenizer);
List<Attribute> attributes = new ArrayList<>();
parser.readHeader(tokenizer, attributes);
return attributes.toArray(new Attribute[attributes.size()]);
}
use of smile.data.Attribute in project smile by haifengl.
the class ArffParser method parse.
/**
* Parse a dataset from given stream.
*/
public AttributeDataset parse(InputStream stream) throws IOException, ParseException {
try (Reader r = new BufferedReader(new InputStreamReader(stream))) {
StreamTokenizer tokenizer = new StreamTokenizer(r);
initTokenizer(tokenizer);
List<Attribute> attributes = new ArrayList<>();
String relationName = readHeader(tokenizer, attributes);
if (attributes.isEmpty()) {
throw new IOException("no header information available");
}
Attribute response = null;
Attribute[] attr = new Attribute[attributes.size()];
attributes.toArray(attr);
for (int i = 0; i < attributes.size(); i++) {
if (responseIndex == i) {
response = attributes.remove(i);
break;
}
}
AttributeDataset data = new AttributeDataset(relationName, attributes.toArray(new Attribute[attributes.size()]), response);
while (true) {
// Check if end of file reached.
getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
break;
}
// Parse instance
if (tokenizer.ttype == '{') {
data.add(getSparseInstance(tokenizer, attr));
} else {
data.add(getInstance(tokenizer, attr));
}
}
for (Attribute attribute : attributes) {
if (attribute instanceof NominalAttribute) {
NominalAttribute a = (NominalAttribute) attribute;
a.setOpen(false);
}
if (attribute instanceof StringAttribute) {
StringAttribute a = (StringAttribute) attribute;
a.setOpen(false);
}
}
return data;
}
}
use of smile.data.Attribute in project smile by haifengl.
the class LibsvmParser method parse.
/**
* Parse a libsvm sparse dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public SparseDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
try {
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
String[] tokens = line.trim().split("\\s+");
boolean classification = true;
Attribute response = null;
try {
Integer.valueOf(tokens[0]);
response = new NominalAttribute("class");
} catch (NumberFormatException e) {
try {
Double.valueOf(tokens[0]);
response = new NominalAttribute("response");
classification = false;
} catch (NumberFormatException ex) {
logger.error("Failed to parse {}", tokens[0], ex);
throw new NumberFormatException("Unrecognized response variable value: " + tokens[0]);
}
}
SparseDataset sparse = new SparseDataset(name, response);
for (int i = 0; line != null; i++) {
tokens = line.trim().split("\\s+");
if (classification) {
int y = Integer.parseInt(tokens[0]);
sparse.set(i, y);
} else {
double y = Double.parseDouble(tokens[0]);
sparse.set(i, y);
}
for (int k = 1; k < tokens.length; k++) {
String[] pair = tokens[k].split(":");
if (pair.length != 2) {
throw new NumberFormatException("Invalid data: " + tokens[k]);
}
int j = Integer.parseInt(pair[0]) - 1;
double x = Double.parseDouble(pair[1]);
sparse.set(i, j, x);
}
line = reader.readLine();
}
if (classification) {
int n = sparse.size();
int[] y = sparse.toArray(new int[n]);
int[] label = Math.unique(y);
Arrays.sort(label);
for (int c : label) {
response.valueOf(String.valueOf(c));
}
for (int i = 0; i < n; i++) {
sparse.get(i).y = Arrays.binarySearch(label, y[i]);
}
}
return sparse;
} finally {
reader.close();
}
}
use of smile.data.Attribute in project smile by haifengl.
the class SammonMappingDemo method learn.
/**
* Execute the MDS algorithm and return a swing JComponent representing
* the clusters.
*/
public JComponent learn() {
JPanel pane = new JPanel(new GridLayout(1, 2));
double[][] data = dataset[datasetIndex].toArray(new double[dataset[datasetIndex].size()][]);
String[] labels = dataset[datasetIndex].toArray(new String[dataset[datasetIndex].size()]);
if (labels[0] == null) {
Attribute[] attr = dataset[datasetIndex].attributes();
labels = new String[attr.length];
for (int i = 0; i < labels.length; i++) {
labels[i] = attr[i].getName();
}
}
long clock = System.currentTimeMillis();
SammonMapping sammon = new SammonMapping(data, 2);
System.out.format("Learn Sammon's Mapping (k=2) from %d samples in %dms\n", data.length, System.currentTimeMillis() - clock);
PlotCanvas plot = ScatterPlot.plot(sammon.getCoordinates(), labels);
plot.setTitle("Sammon's Mapping (k = 2)");
pane.add(plot);
clock = System.currentTimeMillis();
sammon = new SammonMapping(data, 3);
System.out.format("Learn Sammon's Mapping (k=3) from %d samples in %dms\n", data.length, System.currentTimeMillis() - clock);
plot = ScatterPlot.plot(sammon.getCoordinates(), labels);
plot.setTitle("Sammon's Mapping (k = 3)");
pane.add(plot);
return pane;
}
Aggregations