use of smile.data.Attribute in project smile by haifengl.
the class TXTParserTest method testParse.
/**
* Test of parse method, of class TXTParser.
*/
@Test
public void testParse() throws Exception {
System.out.println("parse");
TXTParser parser = new TXTParser();
try {
AttributeDataset data = parser.parse("PCL", smile.data.parser.IOUtils.getTestDataFile("microarray/Dunham2002.txt"));
double[][] x = data.toArray(new double[data.size()][]);
String[] id = data.toArray(new String[data.size()]);
for (Attribute attribute : data.attributes()) {
assertEquals(Attribute.Type.NUMERIC, attribute.getType());
System.out.println(attribute.getName());
}
assertEquals(6694, data.size());
assertEquals(16, data.attributes().length);
assertEquals("YKR005C", id[0]);
assertEquals(-0.43, x[0][0], 1E-7);
assertEquals(-0.47, x[0][1], 1E-7);
assertEquals(-0.39, x[0][2], 1E-7);
assertEquals("YKR004C", id[6693]);
assertEquals(0.03, x[6693][13], 1E-7);
assertEquals(-0.53, x[6693][14], 1E-7);
assertEquals(0.3, x[6693][15], 1E-7);
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.Attribute in project smile by haifengl.
the class ArffParser method parseAttribute.
/**
* Parses the attribute declaration.
*
* @return an attributes in this relation
* @throws IOException if the information is not read
* successfully
*/
private Attribute parseAttribute(StreamTokenizer tokenizer) throws IOException, ParseException {
Attribute attribute = null;
// Get attribute name.
getNextToken(tokenizer);
String attributeName = tokenizer.sval;
getNextToken(tokenizer);
// Check if attribute is nominal.
if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
// Attribute is real, integer, or string.
if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_REAL) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_INTEGER) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_NUMERIC)) {
attribute = new NumericAttribute(attributeName);
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_STRING)) {
attribute = new StringAttribute(attributeName);
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_DATE)) {
String format = null;
if (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
if ((tokenizer.ttype != StreamTokenizer.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"')) {
throw new ParseException("not a valid date format", tokenizer.lineno());
}
format = tokenizer.sval;
readTillEOL(tokenizer);
} else {
tokenizer.pushBack();
}
attribute = new DateAttribute(attributeName, null, format);
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_RELATIONAL)) {
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase(ARFF_END_SUBRELATION)) {
getNextToken(tokenizer);
} else {
throw new ParseException("Invalid attribute type or invalid enumeration", tokenizer.lineno());
}
} else {
// Attribute is nominal.
List<String> attributeValues = new ArrayList<>();
tokenizer.pushBack();
// Get values for nominal attribute.
if (tokenizer.nextToken() != '{') {
throw new ParseException("{ expected at beginning of enumeration", tokenizer.lineno());
}
while (tokenizer.nextToken() != '}') {
if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
throw new ParseException("} expected at end of enumeration", tokenizer.lineno());
} else {
attributeValues.add(tokenizer.sval.trim());
}
}
String[] values = new String[attributeValues.size()];
for (int i = 0; i < values.length; i++) {
values[i] = attributeValues.get(i);
}
attribute = new NominalAttribute(attributeName, values);
}
getLastToken(tokenizer, false);
getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
throw new ParseException(PREMATURE_END_OF_FILE, tokenizer.lineno());
}
return attribute;
}
use of smile.data.Attribute in project smile by haifengl.
the class NumericAttributeFeatureTest method testSTANDARDIZATION.
/**
* Test of f method, of class NumericAttributeFeature.
*/
@Test
public void testSTANDARDIZATION() {
System.out.println("STANDARDIZATION");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
double[][] x = data.toArray(new double[data.size()][]);
double[] mean = Math.colMean(x);
double[] sd = Math.colSd(x);
NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.STANDARDIZATION, x);
Attribute[] attributes = naf.attributes();
assertEquals(256, attributes.length);
for (int i = 0; i < x.length; i++) {
double[] y = new double[attributes.length];
for (int j = 0; j < y.length; j++) {
y[j] = naf.f(x[i], j);
assertEquals((x[i][j] - mean[j]) / sd[j], y[j], 1E-7);
}
}
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.Attribute in project smile by haifengl.
the class NumericAttributeFeatureTest method testNONE.
/**
* Test of f method, of class NumericAttributeFeature.
*/
@Test
public void testNONE() {
System.out.println("NONE");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
double[][] x = data.toArray(new double[data.size()][]);
NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.NONE);
Attribute[] attributes = naf.attributes();
assertEquals(256, attributes.length);
for (int i = 0; i < x.length; i++) {
double[] y = new double[attributes.length];
for (int j = 0; j < y.length; j++) {
y[j] = naf.f(x[i], j);
assertEquals(x[i][j], y[j], 1E-7);
}
}
} catch (Exception ex) {
System.err.println(ex);
}
}
use of smile.data.Attribute in project smile by haifengl.
the class NumericAttributeFeatureTest method testAttributes.
/**
* Test of attributes method, of class NumericAttributeFeature.
*/
@SuppressWarnings("unused")
@Test
public void testAttributes() {
System.out.println("attributes");
DelimitedTextParser parser = new DelimitedTextParser();
parser.setResponseIndex(new NominalAttribute("class"), 0);
try {
AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
double[][] x = data.toArray(new double[data.size()][]);
NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.LOGARITHM);
Attribute[] attributes = naf.attributes();
assertEquals(256, attributes.length);
for (int i = 0; i < attributes.length; i++) {
System.out.println(attributes[i]);
assertEquals(Attribute.Type.NUMERIC, attributes[i].getType());
}
} catch (Exception ex) {
System.err.println(ex);
}
}
Aggregations