use of smile.data.NominalAttribute in project smile by haifengl.
the class NumericAttributeFeatureTest method testLOGARITHM.
/**
* Test of f method, of class NumericAttributeFeature.
*/
@Test
public void testLOGARITHM() {
System.out.println("LOGARITHM");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
double[][] x = data.toArray(new double[data.size()][]);
for (int i = 0; i < x.length; i++) {
for (int j = 0; j < x[i].length; j++) {
x[i][j] += 2.0;
}
}
NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.LOGARITHM);
Attribute[] attributes = naf.attributes();
assertEquals(256, attributes.length);
for (int i = 0; i < x.length; i++) {
double[] y = new double[attributes.length];
for (int j = 0; j < y.length; j++) {
y[j] = naf.f(x[i], j);
assertEquals(Math.log(x[i][j]), y[j], 1E-7);
}
}
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.NominalAttribute in project smile by haifengl.
the class SumSquaresRatioTest method testLearn.
/**
* Test of learn method, of class SumSquaresRatio.
*/
@Test
public void testLearn() {
System.out.println("USPS");
try {
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
double[][] x = train.toArray(new double[train.size()][]);
int[] y = train.toArray(new int[train.size()]);
double[][] testx = test.toArray(new double[test.size()][]);
int[] testy = test.toArray(new int[test.size()]);
SumSquaresRatio ssr = new SumSquaresRatio();
double[] score = ssr.rank(x, y);
int[] index = QuickSort.sort(score);
int p = 135;
int n = x.length;
double[][] xx = new double[n][p];
for (int j = 0; j < p; j++) {
for (int i = 0; i < n; i++) {
xx[i][j] = x[i][index[255 - j]];
}
}
int testn = testx.length;
double[][] testxx = new double[testn][p];
for (int j = 0; j < p; j++) {
for (int i = 0; i < testn; i++) {
testxx[i][j] = testx[i][index[255 - j]];
}
}
LDA lda = new LDA(xx, y);
int[] prediction = new int[testn];
for (int i = 0; i < testn; i++) {
prediction[i] = lda.predict(testxx[i]);
}
double accuracy = new Accuracy().measure(testy, prediction);
System.out.format("SSR %.2f%%%n", 100 * accuracy);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.NominalAttribute in project smile by haifengl.
the class CRFTest method load.
Dataset load(String resource, Attribute[] attributes) {
int p = 0;
int k = 0;
Dataset dataset = new Dataset();
dataset.attributes = attributes;
ArrayList<double[][]> x = new ArrayList<>();
ArrayList<int[]> y = new ArrayList<>();
ArrayList<double[]> seq = new ArrayList<>();
ArrayList<Integer> label = new ArrayList<>();
int id = 1;
try (BufferedReader input = smile.data.parser.IOUtils.getTestDataReader(resource)) {
String[] words = input.readLine().split(" ");
int nseq = Integer.parseInt(words[0]);
k = Integer.parseInt(words[1]);
p = Integer.parseInt(words[2]);
String line = null;
while ((line = input.readLine()) != null) {
words = line.split(" ");
int seqid = Integer.parseInt(words[0]);
int pos = Integer.parseInt(words[1]);
int len = Integer.parseInt(words[2]);
if (dataset.attributes == null) {
dataset.attributes = new Attribute[len];
for (int i = 0; i < len; i++) {
dataset.attributes[i] = new NominalAttribute("Attr" + (i + 1));
}
}
double[] feature = new double[len];
for (int i = 0; i < len; i++) {
try {
feature[i] = dataset.attributes[i].valueOf(words[i + 3]);
} catch (ParseException ex) {
System.err.println(ex);
}
}
if (seqid == id) {
seq.add(feature);
label.add(Integer.valueOf(words[len + 3]));
} else {
id = seqid;
double[][] xx = new double[seq.size()][];
int[] yy = new int[seq.size()];
for (int i = 0; i < seq.size(); i++) {
xx[i] = seq.get(i);
yy[i] = label.get(i);
}
x.add(xx);
y.add(yy);
seq = new ArrayList<>();
label = new ArrayList<>();
seq.add(feature);
label.add(Integer.valueOf(words[len + 3]));
}
}
double[][] xx = new double[seq.size()][];
int[] yy = new int[seq.size()];
for (int i = 0; i < seq.size(); i++) {
xx[i] = seq.get(i);
yy[i] = label.get(i);
}
x.add(xx);
y.add(yy);
} catch (IOException ex) {
System.err.println(ex);
}
dataset.p = p;
dataset.k = k;
dataset.x = new double[x.size()][][];
dataset.y = new int[y.size()][];
for (int i = 0; i < dataset.x.length; i++) {
dataset.x[i] = x.get(i);
dataset.y[i] = y.get(i);
}
return dataset;
}
use of smile.data.NominalAttribute in project smile by haifengl.
the class ValidationTest method testTest_3args_1.
/**
* Test of test method, of class Validation.
*/
@Test
public void testTest_3args_1() {
System.out.println("test");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
double[][] x = train.toArray(new double[train.size()][]);
int[] y = train.toArray(new int[train.size()]);
double[][] testx = test.toArray(new double[test.size()][]);
int[] testy = test.toArray(new int[test.size()]);
LDA lda = new LDA(x, y);
double accuracy = Validation.test(lda, testx, testy);
System.out.println("accuracy = " + accuracy);
assertEquals(0.8724, accuracy, 1E-4);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.NominalAttribute in project smile by haifengl.
the class ArffParser method parseAttribute.
/**
* Parses the attribute declaration.
*
* @return an attributes in this relation
* @throws IOException if the information is not read
* successfully
*/
private Attribute parseAttribute(StreamTokenizer tokenizer) throws IOException, ParseException {
Attribute attribute = null;
// Get attribute name.
getNextToken(tokenizer);
String attributeName = tokenizer.sval;
getNextToken(tokenizer);
// Check if attribute is nominal.
if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
// Attribute is real, integer, or string.
if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_REAL) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_INTEGER) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_NUMERIC)) {
attribute = new NumericAttribute(attributeName);
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_STRING)) {
attribute = new StringAttribute(attributeName);
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_DATE)) {
String format = null;
if (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
if ((tokenizer.ttype != StreamTokenizer.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"')) {
throw new ParseException("not a valid date format", tokenizer.lineno());
}
format = tokenizer.sval;
readTillEOL(tokenizer);
} else {
tokenizer.pushBack();
}
attribute = new DateAttribute(attributeName, null, format);
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_RELATIONAL)) {
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_END_SUBRELATION)) {
getNextToken(tokenizer);
} else {
throw new ParseException("Invalid attribute type or invalid enumeration", tokenizer.lineno());
}
} else {
// Attribute is nominal.
List<String> attributeValues = new ArrayList<>();
tokenizer.pushBack();
// Get values for nominal attribute.
if (tokenizer.nextToken() != '{') {
throw new ParseException("{ expected at beginning of enumeration", tokenizer.lineno());
}
while (tokenizer.nextToken() != '}') {
if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
throw new ParseException("} expected at end of enumeration", tokenizer.lineno());
} else {
attributeValues.add(tokenizer.sval.trim());
}
}
String[] values = new String[attributeValues.size()];
for (int i = 0; i < values.length; i++) {
values[i] = attributeValues.get(i);
}
attribute = new NominalAttribute(attributeName, values);
}
getLastToken(tokenizer, false);
getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
throw new ParseException(PREMATURE_END_OF_FILE, tokenizer.lineno());
}
return attribute;
}
Aggregations