Search in sources :

Example 1 with StringAttribute

use of smile.data.StringAttribute in project smile by haifengl.

the class ArffParser method parse.

/**
     * Parse a dataset from given stream.
     */
public AttributeDataset parse(InputStream stream) throws IOException, ParseException {
    try (Reader r = new BufferedReader(new InputStreamReader(stream))) {
        StreamTokenizer tokenizer = new StreamTokenizer(r);
        initTokenizer(tokenizer);
        List<Attribute> attributes = new ArrayList<>();
        String relationName = readHeader(tokenizer, attributes);
        if (attributes.isEmpty()) {
            throw new IOException("no header information available");
        }
        Attribute response = null;
        Attribute[] attr = new Attribute[attributes.size()];
        attributes.toArray(attr);
        for (int i = 0; i < attributes.size(); i++) {
            if (responseIndex == i) {
                response = attributes.remove(i);
                break;
            }
        }
        AttributeDataset data = new AttributeDataset(relationName, attributes.toArray(new Attribute[attributes.size()]), response);
        while (true) {
            // Check if end of file reached.
            getFirstToken(tokenizer);
            if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
                break;
            }
            // Parse instance
            if (tokenizer.ttype == '{') {
                data.add(getSparseInstance(tokenizer, attr));
            } else {
                data.add(getInstance(tokenizer, attr));
            }
        }
        for (Attribute attribute : attributes) {
            if (attribute instanceof NominalAttribute) {
                NominalAttribute a = (NominalAttribute) attribute;
                a.setOpen(false);
            }
            if (attribute instanceof StringAttribute) {
                StringAttribute a = (StringAttribute) attribute;
                a.setOpen(false);
            }
        }
        return data;
    }
}
Also used : AttributeDataset(smile.data.AttributeDataset) InputStreamReader(java.io.InputStreamReader) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute) StringAttribute(smile.data.StringAttribute) ArrayList(java.util.ArrayList) StringAttribute(smile.data.StringAttribute) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) NominalAttribute(smile.data.NominalAttribute) BufferedReader(java.io.BufferedReader) StreamTokenizer(java.io.StreamTokenizer)

Example 2 with StringAttribute

use of smile.data.StringAttribute in project smile by haifengl.

the class ArffParser method parseAttribute.

/**
     * Parses the attribute declaration.
     *
     * @return an attributes in this relation
     * @throws IOException 	if the information is not read
     * 				successfully
     */
private Attribute parseAttribute(StreamTokenizer tokenizer) throws IOException, ParseException {
    Attribute attribute = null;
    // Get attribute name.
    getNextToken(tokenizer);
    String attributeName = tokenizer.sval;
    getNextToken(tokenizer);
    // Check if attribute is nominal.
    if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
        // Attribute is real, integer, or string.
        if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_REAL) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_INTEGER) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_NUMERIC)) {
            attribute = new NumericAttribute(attributeName);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_STRING)) {
            attribute = new StringAttribute(attributeName);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_DATE)) {
            String format = null;
            if (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
                if ((tokenizer.ttype != StreamTokenizer.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"')) {
                    throw new ParseException("not a valid date format", tokenizer.lineno());
                }
                format = tokenizer.sval;
                readTillEOL(tokenizer);
            } else {
                tokenizer.pushBack();
            }
            attribute = new DateAttribute(attributeName, null, format);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_RELATIONAL)) {
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_END_SUBRELATION)) {
            getNextToken(tokenizer);
        } else {
            throw new ParseException("Invalid attribute type or invalid enumeration", tokenizer.lineno());
        }
    } else {
        // Attribute is nominal.
        List<String> attributeValues = new ArrayList<>();
        tokenizer.pushBack();
        // Get values for nominal attribute.
        if (tokenizer.nextToken() != '{') {
            throw new ParseException("{ expected at beginning of enumeration", tokenizer.lineno());
        }
        while (tokenizer.nextToken() != '}') {
            if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
                throw new ParseException("} expected at end of enumeration", tokenizer.lineno());
            } else {
                attributeValues.add(tokenizer.sval.trim());
            }
        }
        String[] values = new String[attributeValues.size()];
        for (int i = 0; i < values.length; i++) {
            values[i] = attributeValues.get(i);
        }
        attribute = new NominalAttribute(attributeName, values);
    }
    getLastToken(tokenizer, false);
    getFirstToken(tokenizer);
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
        throw new ParseException(PREMATURE_END_OF_FILE, tokenizer.lineno());
    }
    return attribute;
}
Also used : NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute) StringAttribute(smile.data.StringAttribute) StringAttribute(smile.data.StringAttribute) ArrayList(java.util.ArrayList) ParseException(java.text.ParseException) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute)

Aggregations

ArrayList (java.util.ArrayList)2 Attribute (smile.data.Attribute)2 DateAttribute (smile.data.DateAttribute)2 NominalAttribute (smile.data.NominalAttribute)2 NumericAttribute (smile.data.NumericAttribute)2 StringAttribute (smile.data.StringAttribute)2 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 InputStreamReader (java.io.InputStreamReader)1 Reader (java.io.Reader)1 StreamTokenizer (java.io.StreamTokenizer)1 ParseException (java.text.ParseException)1 AttributeDataset (smile.data.AttributeDataset)1