Search in sources :

Example 21 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class TXTParserTest method testParse.

/**
     * Test of parse method, of class TXTParser.
     */
@Test
public void testParse() throws Exception {
    System.out.println("parse");
    TXTParser parser = new TXTParser();
    try {
        AttributeDataset data = parser.parse("PCL", smile.data.parser.IOUtils.getTestDataFile("microarray/Dunham2002.txt"));
        double[][] x = data.toArray(new double[data.size()][]);
        String[] id = data.toArray(new String[data.size()]);
        for (Attribute attribute : data.attributes()) {
            assertEquals(Attribute.Type.NUMERIC, attribute.getType());
            System.out.println(attribute.getName());
        }
        assertEquals(6694, data.size());
        assertEquals(16, data.attributes().length);
        assertEquals("YKR005C", id[0]);
        assertEquals(-0.43, x[0][0], 1E-7);
        assertEquals(-0.47, x[0][1], 1E-7);
        assertEquals(-0.39, x[0][2], 1E-7);
        assertEquals("YKR004C", id[6693]);
        assertEquals(0.03, x[6693][13], 1E-7);
        assertEquals(-0.53, x[6693][14], 1E-7);
        assertEquals(0.3, x[6693][15], 1E-7);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 22 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class ArffParser method parseAttribute.

/**
     * Parses the attribute declaration.
     *
     * @return an attributes in this relation
     * @throws IOException 	if the information is not read
     * 				successfully
     */
private Attribute parseAttribute(StreamTokenizer tokenizer) throws IOException, ParseException {
    Attribute attribute = null;
    // Get attribute name.
    getNextToken(tokenizer);
    String attributeName = tokenizer.sval;
    getNextToken(tokenizer);
    // Check if attribute is nominal.
    if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
        // Attribute is real, integer, or string.
        if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_REAL) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_INTEGER) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_NUMERIC)) {
            attribute = new NumericAttribute(attributeName);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_STRING)) {
            attribute = new StringAttribute(attributeName);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_DATE)) {
            String format = null;
            if (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
                if ((tokenizer.ttype != StreamTokenizer.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"')) {
                    throw new ParseException("not a valid date format", tokenizer.lineno());
                }
                format = tokenizer.sval;
                readTillEOL(tokenizer);
            } else {
                tokenizer.pushBack();
            }
            attribute = new DateAttribute(attributeName, null, format);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_RELATIONAL)) {
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_END_SUBRELATION)) {
            getNextToken(tokenizer);
        } else {
            throw new ParseException("Invalid attribute type or invalid enumeration", tokenizer.lineno());
        }
    } else {
        // Attribute is nominal.
        List<String> attributeValues = new ArrayList<>();
        tokenizer.pushBack();
        // Get values for nominal attribute.
        if (tokenizer.nextToken() != '{') {
            throw new ParseException("{ expected at beginning of enumeration", tokenizer.lineno());
        }
        while (tokenizer.nextToken() != '}') {
            if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
                throw new ParseException("} expected at end of enumeration", tokenizer.lineno());
            } else {
                attributeValues.add(tokenizer.sval.trim());
            }
        }
        String[] values = new String[attributeValues.size()];
        for (int i = 0; i < values.length; i++) {
            values[i] = attributeValues.get(i);
        }
        attribute = new NominalAttribute(attributeName, values);
    }
    getLastToken(tokenizer, false);
    getFirstToken(tokenizer);
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
        throw new ParseException(PREMATURE_END_OF_FILE, tokenizer.lineno());
    }
    return attribute;
}
Also used : NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute) StringAttribute(smile.data.StringAttribute) StringAttribute(smile.data.StringAttribute) ArrayList(java.util.ArrayList) ParseException(java.text.ParseException) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute)

Example 23 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class NumericAttributeFeatureTest method testSTANDARDIZATION.

/**
     * Test of f method, of class NumericAttributeFeature.
     */
@Test
public void testSTANDARDIZATION() {
    System.out.println("STANDARDIZATION");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        double[][] x = data.toArray(new double[data.size()][]);
        double[] mean = Math.colMean(x);
        double[] sd = Math.colSd(x);
        NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.STANDARDIZATION, x);
        Attribute[] attributes = naf.attributes();
        assertEquals(256, attributes.length);
        for (int i = 0; i < x.length; i++) {
            double[] y = new double[attributes.length];
            for (int j = 0; j < y.length; j++) {
                y[j] = naf.f(x[i], j);
                assertEquals((x[i][j] - mean[j]) / sd[j], y[j], 1E-7);
            }
        }
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 24 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class NumericAttributeFeatureTest method testNONE.

/**
     * Test of f method, of class NumericAttributeFeature.
     */
@Test
public void testNONE() {
    System.out.println("NONE");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        double[][] x = data.toArray(new double[data.size()][]);
        NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.NONE);
        Attribute[] attributes = naf.attributes();
        assertEquals(256, attributes.length);
        for (int i = 0; i < x.length; i++) {
            double[] y = new double[attributes.length];
            for (int j = 0; j < y.length; j++) {
                y[j] = naf.f(x[i], j);
                assertEquals(x[i][j], y[j], 1E-7);
            }
        }
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 25 with Attribute

use of smile.data.Attribute in project smile by haifengl.

the class NumericAttributeFeatureTest method testAttributes.

/**
     * Test of attributes method, of class NumericAttributeFeature.
     */
@SuppressWarnings("unused")
@Test
public void testAttributes() {
    System.out.println("attributes");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        double[][] x = data.toArray(new double[data.size()][]);
        NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.LOGARITHM);
        Attribute[] attributes = naf.attributes();
        assertEquals(256, attributes.length);
        for (int i = 0; i < attributes.length; i++) {
            System.out.println(attributes[i]);
            assertEquals(Attribute.Type.NUMERIC, attributes[i].getType());
        }
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) Test(org.junit.Test)

Aggregations

Attribute (smile.data.Attribute)35 AttributeDataset (smile.data.AttributeDataset)29 Test (org.junit.Test)24 NominalAttribute (smile.data.NominalAttribute)15 ArffParser (smile.data.parser.ArffParser)8 DelimitedTextParser (smile.data.parser.DelimitedTextParser)8 BufferedReader (java.io.BufferedReader)7 InputStreamReader (java.io.InputStreamReader)7 NumericAttribute (smile.data.NumericAttribute)7 IOException (java.io.IOException)6 Datum (smile.data.Datum)4 GridLayout (java.awt.GridLayout)3 ArrayList (java.util.ArrayList)3 JPanel (javax.swing.JPanel)3 DateAttribute (smile.data.DateAttribute)3 StringAttribute (smile.data.StringAttribute)3 PlotCanvas (smile.plot.PlotCanvas)3 Reader (java.io.Reader)2 StreamTokenizer (java.io.StreamTokenizer)2 ParseException (java.text.ParseException)1