Search in sources :

Example 26 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class NumericAttributeFeatureTest method testLOGARITHM.

/**
     * Test of f method, of class NumericAttributeFeature.
     */
@Test
public void testLOGARITHM() {
    System.out.println("LOGARITHM");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset data = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        double[][] x = data.toArray(new double[data.size()][]);
        for (int i = 0; i < x.length; i++) {
            for (int j = 0; j < x[i].length; j++) {
                x[i][j] += 2.0;
            }
        }
        NumericAttributeFeature naf = new NumericAttributeFeature(data.attributes(), NumericAttributeFeature.Scaling.LOGARITHM);
        Attribute[] attributes = naf.attributes();
        assertEquals(256, attributes.length);
        for (int i = 0; i < x.length; i++) {
            double[] y = new double[attributes.length];
            for (int j = 0; j < y.length; j++) {
                y[j] = naf.f(x[i], j);
                assertEquals(Math.log(x[i][j]), y[j], 1E-7);
            }
        }
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) Test(org.junit.Test)

Example 27 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class SumSquaresRatioTest method testLearn.

/**
     * Test of learn method, of class SumSquaresRatio.
     */
@Test
public void testLearn() {
    System.out.println("USPS");
    try {
        DelimitedTextParser parser = new DelimitedTextParser();
        parser.setResponseIndex(new NominalAttribute("class"), 0);
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        SumSquaresRatio ssr = new SumSquaresRatio();
        double[] score = ssr.rank(x, y);
        int[] index = QuickSort.sort(score);
        int p = 135;
        int n = x.length;
        double[][] xx = new double[n][p];
        for (int j = 0; j < p; j++) {
            for (int i = 0; i < n; i++) {
                xx[i][j] = x[i][index[255 - j]];
            }
        }
        int testn = testx.length;
        double[][] testxx = new double[testn][p];
        for (int j = 0; j < p; j++) {
            for (int i = 0; i < testn; i++) {
                testxx[i][j] = testx[i][index[255 - j]];
            }
        }
        LDA lda = new LDA(xx, y);
        int[] prediction = new int[testn];
        for (int i = 0; i < testn; i++) {
            prediction[i] = lda.predict(testxx[i]);
        }
        double accuracy = new Accuracy().measure(testy, prediction);
        System.out.format("SSR %.2f%%%n", 100 * accuracy);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) Accuracy(smile.validation.Accuracy) NominalAttribute(smile.data.NominalAttribute) LDA(smile.classification.LDA) Test(org.junit.Test)

Example 28 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class CRFTest method load.

Dataset load(String resource, Attribute[] attributes) {
    int p = 0;
    int k = 0;
    Dataset dataset = new Dataset();
    dataset.attributes = attributes;
    ArrayList<double[][]> x = new ArrayList<>();
    ArrayList<int[]> y = new ArrayList<>();
    ArrayList<double[]> seq = new ArrayList<>();
    ArrayList<Integer> label = new ArrayList<>();
    int id = 1;
    try (BufferedReader input = smile.data.parser.IOUtils.getTestDataReader(resource)) {
        String[] words = input.readLine().split(" ");
        int nseq = Integer.parseInt(words[0]);
        k = Integer.parseInt(words[1]);
        p = Integer.parseInt(words[2]);
        String line = null;
        while ((line = input.readLine()) != null) {
            words = line.split(" ");
            int seqid = Integer.parseInt(words[0]);
            int pos = Integer.parseInt(words[1]);
            int len = Integer.parseInt(words[2]);
            if (dataset.attributes == null) {
                dataset.attributes = new Attribute[len];
                for (int i = 0; i < len; i++) {
                    dataset.attributes[i] = new NominalAttribute("Attr" + (i + 1));
                }
            }
            double[] feature = new double[len];
            for (int i = 0; i < len; i++) {
                try {
                    feature[i] = dataset.attributes[i].valueOf(words[i + 3]);
                } catch (ParseException ex) {
                    System.err.println(ex);
                }
            }
            if (seqid == id) {
                seq.add(feature);
                label.add(Integer.valueOf(words[len + 3]));
            } else {
                id = seqid;
                double[][] xx = new double[seq.size()][];
                int[] yy = new int[seq.size()];
                for (int i = 0; i < seq.size(); i++) {
                    xx[i] = seq.get(i);
                    yy[i] = label.get(i);
                }
                x.add(xx);
                y.add(yy);
                seq = new ArrayList<>();
                label = new ArrayList<>();
                seq.add(feature);
                label.add(Integer.valueOf(words[len + 3]));
            }
        }
        double[][] xx = new double[seq.size()][];
        int[] yy = new int[seq.size()];
        for (int i = 0; i < seq.size(); i++) {
            xx[i] = seq.get(i);
            yy[i] = label.get(i);
        }
        x.add(xx);
        y.add(yy);
    } catch (IOException ex) {
        System.err.println(ex);
    }
    dataset.p = p;
    dataset.k = k;
    dataset.x = new double[x.size()][][];
    dataset.y = new int[y.size()][];
    for (int i = 0; i < dataset.x.length; i++) {
        dataset.x[i] = x.get(i);
        dataset.y[i] = y.get(i);
    }
    return dataset;
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) NominalAttribute(smile.data.NominalAttribute) BufferedReader(java.io.BufferedReader) ParseException(java.text.ParseException)

Example 29 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class ValidationTest method testTest_3args_1.

/**
     * Test of test method, of class Validation.
     */
@Test
public void testTest_3args_1() {
    System.out.println("test");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
        AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
        AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));
        double[][] x = train.toArray(new double[train.size()][]);
        int[] y = train.toArray(new int[train.size()]);
        double[][] testx = test.toArray(new double[test.size()][]);
        int[] testy = test.toArray(new int[test.size()]);
        LDA lda = new LDA(x, y);
        double accuracy = Validation.test(lda, testx, testy);
        System.out.println("accuracy = " + accuracy);
        assertEquals(0.8724, accuracy, 1E-4);
    } catch (Exception ex) {
        System.err.println(ex);
    }
}
Also used : DelimitedTextParser(smile.data.parser.DelimitedTextParser) AttributeDataset(smile.data.AttributeDataset) NominalAttribute(smile.data.NominalAttribute) LDA(smile.classification.LDA) Test(org.junit.Test)

Example 30 with NominalAttribute

use of smile.data.NominalAttribute in project smile by haifengl.

the class ArffParser method parseAttribute.

/**
     * Parses the attribute declaration.
     *
     * @return an attributes in this relation
     * @throws IOException 	if the information is not read
     * 				successfully
     */
private Attribute parseAttribute(StreamTokenizer tokenizer) throws IOException, ParseException {
    Attribute attribute = null;
    // Get attribute name.
    getNextToken(tokenizer);
    String attributeName = tokenizer.sval;
    getNextToken(tokenizer);
    // Check if attribute is nominal.
    if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
        // Attribute is real, integer, or string.
        if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_REAL) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_INTEGER) || tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_NUMERIC)) {
            attribute = new NumericAttribute(attributeName);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_STRING)) {
            attribute = new StringAttribute(attributeName);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_DATE)) {
            String format = null;
            if (tokenizer.nextToken() != StreamTokenizer.TT_EOL) {
                if ((tokenizer.ttype != StreamTokenizer.TT_WORD) && (tokenizer.ttype != '\'') && (tokenizer.ttype != '\"')) {
                    throw new ParseException("not a valid date format", tokenizer.lineno());
                }
                format = tokenizer.sval;
                readTillEOL(tokenizer);
            } else {
                tokenizer.pushBack();
            }
            attribute = new DateAttribute(attributeName, null, format);
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_ATTRIBUTE_RELATIONAL)) {
            readTillEOL(tokenizer);
        } else if (tokenizer.sval.equalsIgnoreCase(ARFF_END_SUBRELATION)) {
            getNextToken(tokenizer);
        } else {
            throw new ParseException("Invalid attribute type or invalid enumeration", tokenizer.lineno());
        }
    } else {
        // Attribute is nominal.
        List<String> attributeValues = new ArrayList<>();
        tokenizer.pushBack();
        // Get values for nominal attribute.
        if (tokenizer.nextToken() != '{') {
            throw new ParseException("{ expected at beginning of enumeration", tokenizer.lineno());
        }
        while (tokenizer.nextToken() != '}') {
            if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
                throw new ParseException("} expected at end of enumeration", tokenizer.lineno());
            } else {
                attributeValues.add(tokenizer.sval.trim());
            }
        }
        String[] values = new String[attributeValues.size()];
        for (int i = 0; i < values.length; i++) {
            values[i] = attributeValues.get(i);
        }
        attribute = new NominalAttribute(attributeName, values);
    }
    getLastToken(tokenizer, false);
    getFirstToken(tokenizer);
    if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
        throw new ParseException(PREMATURE_END_OF_FILE, tokenizer.lineno());
    }
    return attribute;
}
Also used : NominalAttribute(smile.data.NominalAttribute) Attribute(smile.data.Attribute) NominalAttribute(smile.data.NominalAttribute) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute) StringAttribute(smile.data.StringAttribute) StringAttribute(smile.data.StringAttribute) ArrayList(java.util.ArrayList) ParseException(java.text.ParseException) NumericAttribute(smile.data.NumericAttribute) DateAttribute(smile.data.DateAttribute)

Aggregations

NominalAttribute (smile.data.NominalAttribute)54 DelimitedTextParser (smile.data.parser.DelimitedTextParser)49 AttributeDataset (smile.data.AttributeDataset)48 Test (org.junit.Test)46 AdjustedRandIndex (smile.validation.AdjustedRandIndex)14 RandIndex (smile.validation.RandIndex)14 Attribute (smile.data.Attribute)12 ArrayList (java.util.ArrayList)7 EuclideanDistance (smile.math.distance.EuclideanDistance)5 IOException (java.io.IOException)4 BufferedReader (java.io.BufferedReader)3 ParseException (java.text.ParseException)3 LDA (smile.classification.LDA)3 InputStreamReader (java.io.InputStreamReader)2 DateAttribute (smile.data.DateAttribute)2 NumericAttribute (smile.data.NumericAttribute)2 StringAttribute (smile.data.StringAttribute)2 PlotCanvas (smile.plot.PlotCanvas)2 Accuracy (smile.validation.Accuracy)2 BorderLayout (java.awt.BorderLayout)1