use of smile.data.AttributeDataset in project smile by haifengl.
the class GrowingNeuralGasTest method testUSPS.
/**
* Test of learn method, of class GrowingNeuralGas.
*/
@Test
public void testUSPS() {
System.out.println("USPS");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
double[][] x = train.toArray(new double[train.size()][]);
int[] y = train.toArray(new int[train.size()]);
double[][] testx = test.toArray(new double[test.size()][]);
int[] testy = test.toArray(new int[test.size()]);
GrowingNeuralGas gng = new GrowingNeuralGas(x[0].length);
for (int i = 0; i < 10; i++) {
int[] index = Math.permutate(x.length);
for (int j = 0; j < x.length; j++) {
gng.update(x[index[j]]);
}
}
gng.partition(10);
AdjustedRandIndex ari = new AdjustedRandIndex();
RandIndex rand = new RandIndex();
int[] p = new int[x.length];
for (int i = 0; i < x.length; i++) {
p[i] = gng.predict(x[i]);
}
double r = rand.measure(y, p);
double r2 = ari.measure(y, p);
System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
assertTrue(r > 0.85);
assertTrue(r2 > 0.40);
p = new int[testx.length];
for (int i = 0; i < testx.length; i++) {
p[i] = gng.predict(testx[i]);
}
r = rand.measure(testy, p);
r2 = ari.measure(testy, p);
System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
assertTrue(r > 0.85);
assertTrue(r2 > 0.40);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class NeuralMapTest method testUSPS.
/**
* Test of learn method, of class NeuralMap.
*/
@Test
public void testUSPS() {
System.out.println("USPS");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
double[][] x = train.toArray(new double[train.size()][]);
int[] y = train.toArray(new int[train.size()]);
double[][] testx = test.toArray(new double[test.size()][]);
int[] testy = test.toArray(new int[test.size()]);
NeuralMap cortex = new NeuralMap(x[0].length, 8.0, 0.05, 0.0006, 5, 3);
for (int i = 0; i < 5; i++) {
for (double[] xi : x) {
cortex.update(xi);
}
}
cortex.purge(16);
cortex.partition(10);
AdjustedRandIndex ari = new AdjustedRandIndex();
RandIndex rand = new RandIndex();
int[] p = new int[x.length];
for (int i = 0; i < x.length; i++) {
p[i] = cortex.predict(x[i]);
}
double r = rand.measure(y, p);
double r2 = ari.measure(y, p);
System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
//assertTrue(r > 0.65);
//assertTrue(r2 > 0.18);
p = new int[testx.length];
for (int i = 0; i < testx.length; i++) {
p[i] = cortex.predict(testx[i]);
}
r = rand.measure(testy, p);
r2 = ari.measure(testy, p);
System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
//assertTrue(r > 0.65);
//assertTrue(r2 > 0.18);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class SOMTest method testUSPS.
/**
* Test of learn method, of class SOM.
*/
@Test
public void testUSPS() {
System.out.println("USPS");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
double[][] x = train.toArray(new double[train.size()][]);
int[] y = train.toArray(new int[train.size()]);
double[][] testx = test.toArray(new double[test.size()][]);
int[] testy = test.toArray(new int[test.size()]);
SOM som = new SOM(x, 10, 10);
int[] label = som.partition(10);
AdjustedRandIndex ari = new AdjustedRandIndex();
RandIndex rand = new RandIndex();
double r = rand.measure(y, label);
double r2 = ari.measure(y, label);
System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
assertTrue(r > 0.88);
assertTrue(r2 > 0.45);
int[] p = new int[testx.length];
for (int i = 0; i < testx.length; i++) {
p[i] = som.predict(testx[i]);
}
r = rand.measure(testy, p);
r2 = ari.measure(testy, p);
System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2);
assertTrue(r > 0.88);
assertTrue(r2 > 0.45);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class GCTParser method parse.
/**
* Parse a GCT dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
if (!line.equals("#1.2")) {
throw new IOException("Invalid version.");
}
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
String[] tokens = line.split("\t", -1);
if (tokens.length != 2) {
throw new IOException("Invalid data size inforamation.");
}
int n = Integer.parseInt(tokens[0]);
int p = Integer.parseInt(tokens[1]);
if (n <= 0 || p <= 0) {
throw new IOException(String.format("Invalid data size %d x %d.", n, p));
}
Attribute[] attributes = new Attribute[p];
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
tokens = line.split("\t", -1);
if (tokens.length != p + 2) {
throw new IOException("Invalid title header.");
}
for (int i = 0; i < p; i++) {
attributes[i] = new NumericAttribute(tokens[i + 2]);
}
AttributeDataset data = new AttributeDataset(name, attributes);
for (int i = 0; i < n; i++) {
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
tokens = line.split("\t", -1);
if (tokens.length != p + 2) {
throw new IOException(String.format("Invalid number of elements of line %d: %d", i + 4, tokens.length));
}
double[] x = new double[p];
for (int j = 0; j < p; j++) {
if (tokens[j + 2].isEmpty()) {
x[j] = Double.NaN;
} else {
x[j] = Double.valueOf(tokens[j + 2]);
}
}
Datum<double[]> datum = new Datum<>(x);
datum.name = tokens[0];
datum.description = tokens[1];
data.add(datum);
}
reader.close();
return data;
}
use of smile.data.AttributeDataset in project smile by haifengl.
the class PCLParser method parse.
/**
* Parse a PCL dataset from an input stream.
* @param name the name of dataset.
* @param stream the input stream of data.
* @throws java.io.IOException
*/
public AttributeDataset parse(String name, InputStream stream) throws IOException, ParseException {
BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
String line = reader.readLine();
if (line == null) {
throw new IOException("Empty data source.");
}
String[] tokens = line.split("\t", -1);
int p = tokens.length - 3;
line = reader.readLine();
if (line == null) {
throw new IOException("Premature end of file.");
}
String[] weight = line.split("\t", -1);
if (weight.length != tokens.length) {
throw new IOException("Invalid sample weight header.");
}
Attribute[] attributes = new Attribute[p];
for (int i = 0; i < p; i++) {
attributes[i] = new NumericAttribute(tokens[i + 3], null, Double.valueOf(weight[i + 3]));
}
AttributeDataset data = new AttributeDataset(name, attributes);
for (int i = 3; (line = reader.readLine()) != null; i++) {
tokens = line.split("\t", -1);
if (tokens.length != weight.length) {
throw new IOException(String.format("Invalid number of elements of line %d: %d", i, tokens.length));
}
double[] x = new double[p];
for (int j = 0; j < p; j++) {
if (tokens[j + 3].isEmpty()) {
x[j] = Double.NaN;
} else {
x[j] = Double.valueOf(tokens[j + 3]);
}
}
Datum<double[]> datum = new Datum<>(x);
datum.name = tokens[0];
datum.description = tokens[1];
datum.weight = Double.valueOf(tokens[2]);
data.add(datum);
}
reader.close();
return data;
}
Aggregations