use of smile.data.SparseDataset in project smile by haifengl.
the class LibsvmParserTest method testParseNG20.
/**
* Test of parse method, of class LibsvmParser.
*/
@Test
public void testParseNG20() throws Exception {
System.out.println("NG20");
LibsvmParser parser = new LibsvmParser();
try {
SparseDataset train = parser.parse("NG20 Train", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.dat"));
SparseDataset test = parser.parse("NG20 Test", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.t.dat"));
int[] y = train.toArray(new int[train.size()]);
int[] testy = test.toArray(new int[test.size()]);
assertEquals(train.size(), 15935);
assertEquals(y[0], 0);
assertEquals(train.get(0, 0), 0.0, 1E-7);
assertEquals(train.get(0, 1), 0.0, 1E-7);
assertEquals(train.get(0, 196), 2.0, 1E-7);
assertEquals(train.get(0, 320), 3.0, 1E-7);
assertEquals(train.get(0, 20504), 0.0, 1E-7);
assertEquals(train.get(0, 20505), 1.0, 1E-7);
assertEquals(train.get(0, 20506), 1.0, 1E-7);
assertEquals(train.get(0, 20507), 0.0, 1E-7);
assertEquals(y[y.length - 1], 16);
assertEquals(train.get(y.length - 1, 0), 1.0, 1E-7);
assertEquals(train.get(y.length - 1, 1), 0.0, 1E-7);
assertEquals(train.get(y.length - 1, 9), 1.0, 1E-7);
assertEquals(train.get(y.length - 1, 10), 0.0, 1E-7);
assertEquals(train.get(y.length - 1, 57796), 0.0, 1E-7);
assertEquals(train.get(y.length - 1, 57797), 1.0, 1E-7);
assertEquals(train.get(y.length - 1, 57798), 0.0, 1E-7);
assertEquals(test.size(), 3993);
assertEquals(testy[0], 1);
assertEquals(testy[testy.length - 3], 17);
assertEquals(testy[testy.length - 2], 18);
assertEquals(testy[testy.length - 1], 16);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.SparseDataset in project smile by haifengl.
the class LibsvmParserTest method testParseGlass.
/**
* Test of parse method, of class LibsvmParser.
*/
@Test
public void testParseGlass() throws Exception {
System.out.println("glass");
LibsvmParser parser = new LibsvmParser();
try {
SparseDataset train = parser.parse("Glass", smile.data.parser.IOUtils.getTestDataFile("libsvm/glass.txt"));
double[][] x = train.toArray();
int[] y = train.toArray(new int[train.size()]);
assertEquals(214, train.size());
assertEquals(9, x[0].length);
assertEquals(0, y[0]);
assertEquals(-0.134323, x[0][0], 1E-7);
assertEquals(-0.124812, x[0][1], 1E-7);
assertEquals(1, x[0][2], 1E-7);
assertEquals(-0.495327, x[0][3], 1E-7);
assertEquals(-0.296429, x[0][4], 1E-7);
assertEquals(-0.980676, x[0][5], 1E-7);
assertEquals(-0.3829, x[0][6], 1E-7);
assertEquals(-1, x[0][7], 1E-7);
assertEquals(-1, x[0][8], 1E-7);
assertEquals(5, y[213]);
assertEquals(-0.476734, x[213][0], 1E-7);
assertEquals(0.0526316, x[213][1], 1E-7);
assertEquals(-1, x[213][2], 1E-7);
assertEquals(0.115265, x[213][3], 1E-7);
assertEquals(0.267857, x[213][4], 1E-7);
assertEquals(-1, x[213][5], 1E-7);
assertEquals(-0.407063, x[213][6], 1E-7);
assertEquals(0.0603174, x[213][7], 1E-7);
assertEquals(-1, x[213][8], 1E-7);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.SparseDataset in project smile by haifengl.
the class LibsvmParser method parse.
/**
* Parse a libsvm sparse dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public SparseDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
try {
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
String[] tokens = line.trim().split("\\s+");
boolean classification = true;
Attribute response = null;
try {
Integer.valueOf(tokens[0]);
response = new NominalAttribute("class");
} catch (NumberFormatException e) {
try {
Double.valueOf(tokens[0]);
response = new NominalAttribute("response");
classification = false;
} catch (NumberFormatException ex) {
logger.error("Failed to parse {}", tokens[0], ex);
throw new NumberFormatException("Unrecognized response variable value: " + tokens[0]);
}
}
SparseDataset sparse = new SparseDataset(name, response);
for (int i = 0; line != null; i++) {
tokens = line.trim().split("\\s+");
if (classification) {
int y = Integer.parseInt(tokens[0]);
sparse.set(i, y);
} else {
double y = Double.parseDouble(tokens[0]);
sparse.set(i, y);
}
for (int k = 1; k < tokens.length; k++) {
String[] pair = tokens[k].split(":");
if (pair.length != 2) {
throw new NumberFormatException("Invalid data: " + tokens[k]);
}
int j = Integer.parseInt(pair[0]) - 1;
double x = Double.parseDouble(pair[1]);
sparse.set(i, j, x);
}
line = reader.readLine();
}
if (classification) {
int n = sparse.size();
int[] y = sparse.toArray(new int[n]);
int[] label = Math.unique(y);
Arrays.sort(label);
for (int c : label) {
response.valueOf(String.valueOf(c));
}
for (int i = 0; i < n; i++) {
sparse.get(i).y = Arrays.binarySearch(label, y[i]);
}
}
return sparse;
} finally {
reader.close();
}
}
use of smile.data.SparseDataset in project smile by haifengl.
the class SIBTest method testParseNG20.
/**
* Test of parse method, of class SIB.
*/
@Test
public void testParseNG20() throws Exception {
System.out.println("NG20");
LibsvmParser parser = new LibsvmParser();
try {
SparseDataset train = parser.parse("NG20 Train", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.dat"));
SparseDataset test = parser.parse("NG20 Test", smile.data.parser.IOUtils.getTestDataFile("libsvm/news20.t.dat"));
int[] y = train.toArray(new int[train.size()]);
int[] testy = test.toArray(new int[test.size()]);
SIB sib = new SIB(train, 20, 100, 8);
System.out.println(sib);
AdjustedRandIndex ari = new AdjustedRandIndex();
RandIndex rand = new RandIndex();
double r = rand.measure(y, sib.getClusterLabel());
double r2 = ari.measure(y, sib.getClusterLabel());
System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
assertTrue(r > 0.85);
assertTrue(r2 > 0.2);
int[] p = new int[test.size()];
for (int i = 0; i < test.size(); i++) {
p[i] = sib.predict(test.get(i).x);
}
r = rand.measure(testy, p);
r2 = ari.measure(testy, p);
System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
assertTrue(r > 0.85);
assertTrue(r2 > 0.2);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.SparseDataset in project smile by haifengl.
the class SparseDatasetParserTest method testParse.
/**
* Test of parse method, of class SparseDatasetParser.
*/
@Test
public void testParse() throws Exception {
System.out.println("parse");
try {
SparseDatasetParser parser = new SparseDatasetParser(1);
SparseDataset data = parser.parse(smile.data.parser.IOUtils.getTestDataFile("text/kos.txt"));
assertEquals(3430, data.size());
assertEquals(6906, data.ncols());
assertEquals(353160, data.length());
assertEquals(2.0, data.get(0, 60), 1E-7);
assertEquals(1.0, data.get(1, 1062), 1E-7);
assertEquals(0.0, data.get(1, 1063), 1E-7);
assertEquals(1.0, data.get(3429, 6821), 1E-7);
} catch (Exception ex) {
System.err.println(ex);
}
}
Aggregations