Search in sources :

Example 21 with PlotCanvas

use of smile.plot.PlotCanvas in project smile by haifengl.

the class ApproximateStringSearchDemo method run.

@Override
public void run() {
    startButton.setEnabled(false);
    knnField.setEnabled(false);
    if (data == null) {
        System.out.print("Loading dataset...");
        List<String> words = new ArrayList<>();
        try {
            FileInputStream stream = new FileInputStream(smile.data.parser.IOUtils.getTestDataFile("index.noun"));
            BufferedReader input = new BufferedReader(new InputStreamReader(stream));
            String line = input.readLine();
            while (line != null) {
                if (!line.startsWith(" ")) {
                    String[] w = line.split("\\s");
                    words.add(w[0].replace('_', ' '));
                }
                line = input.readLine();
            }
        } catch (Exception e) {
            System.err.println(e);
        }
        data = words.toArray(new String[1]);
        System.out.println(words.size() + " words");
        System.out.println("Building searching data structure...");
        long time = System.currentTimeMillis();
        naive = new LinearSearch<>(data, new EditDistance(50, true));
        int naiveBuild = (int) (System.currentTimeMillis() - time) / 1000;
        time = System.currentTimeMillis();
        bktree = new BKTree<>(new EditDistance(50, true));
        bktree.add(data);
        int bktreeBuild = (int) (System.currentTimeMillis() - time) / 1000;
        time = System.currentTimeMillis();
        cover = new CoverTree<>(data, new EditDistance(50, true));
        int coverBuild = (int) (System.currentTimeMillis() - time) / 1000;
        double[] buildTime = { naiveBuild, bktreeBuild, coverBuild };
        PlotCanvas build = BarPlot.plot(buildTime, label);
        build.setTitle("Build Time");
        canvas.add(build);
        validate();
    }
    int[] perm = Math.permutate(data.length);
    System.out.println("Perform 1000 searches...");
    long time = System.currentTimeMillis();
    List<Neighbor<String, String>> neighbors = new ArrayList<>();
    for (int i = 0; i < 1000; i++) {
        naive.range(data[perm[i]], knn, neighbors);
        neighbors.clear();
    }
    int naiveSearch = (int) (System.currentTimeMillis() - time) / 1000;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        bktree.range(data[perm[i]], knn, neighbors);
        neighbors.clear();
    }
    int kdtreeSearch = (int) (System.currentTimeMillis() - time) / 1000;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        cover.range(data[perm[i]], knn, neighbors);
        neighbors.clear();
    }
    int coverSearch = (int) (System.currentTimeMillis() - time) / 1000;
    double[] searchTime = { naiveSearch, kdtreeSearch, coverSearch };
    PlotCanvas search = BarPlot.plot(searchTime, label);
    search.setTitle("Search Time of k = " + knn);
    canvas.add(search);
    if (canvas.getComponentCount() > 3)
        canvas.setLayout(new GridLayout(2, 2));
    validate();
    startButton.setEnabled(true);
    knnField.setEnabled(true);
}
Also used : InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) Neighbor(smile.neighbor.Neighbor) FileInputStream(java.io.FileInputStream) GridLayout(java.awt.GridLayout) BufferedReader(java.io.BufferedReader) EditDistance(smile.math.distance.EditDistance) PlotCanvas(smile.plot.PlotCanvas)

Example 22 with PlotCanvas

use of smile.plot.PlotCanvas in project smile by haifengl.

the class KNNDemo method run.

@Override
public void run() {
    startButton.setEnabled(false);
    logNSlider.setEnabled(false);
    dimensionSlider.setEnabled(false);
    knnField.setEnabled(false);
    logN = logNSlider.getValue();
    dimension = dimensionSlider.getValue();
    System.out.println("Generating dataset...");
    int n = (int) Math.pow(10, logN);
    double[][] data = new double[n][];
    for (int i = 0; i < n; i++) {
        data[i] = new double[dimension];
        for (int j = 0; j < dimension; j++) {
            data[i][j] = Math.random();
        }
    }
    int[] perm = Math.permutate(n);
    System.out.println("Building searching data structure...");
    long time = System.currentTimeMillis();
    LinearSearch<double[]> naive = new LinearSearch<>(data, new EuclideanDistance());
    int naiveBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    KDTree<double[]> kdtree = new KDTree<>(data, data);
    int kdtreeBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    CoverTree<double[]> cover = new CoverTree<>(data, new EuclideanDistance());
    int coverBuild = (int) (System.currentTimeMillis() - time);
    System.out.println("Perform 1000 searches...");
    double radius = 0.0;
    List<Neighbor<double[], double[]>[]> answers = new ArrayList<>(1000);
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        answers.add(naive.knn(data[perm[i]], knn));
        for (int j = 0; j < answers.get(i).length; j++) {
            radius += answers.get(i)[j].distance;
        }
    }
    int naiveSearch = (int) (System.currentTimeMillis() - time);
    radius /= 1000 * knn;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        kdtree.knn(data[perm[i]], knn);
    }
    int kdtreeSearch = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        cover.knn(data[perm[i]], knn);
    }
    int coverSearch = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    LSH<double[]> lsh = new LSH<>(dimension, 5, (int) Math.log2(dimension), 4 * radius, 1017881);
    for (int i = 0; i < n; i++) {
        lsh.put(data[i], data[i]);
    }
    int lshBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    MPLSH<double[]> mplsh = new MPLSH<>(dimension, 2, (int) Math.log2(n), 4 * radius, 1017881);
    for (int i = 0; i < n; i++) {
        mplsh.put(data[i], data[i]);
    }
    double[][] train = new double[1000][];
    for (int i = 0; i < train.length; i++) {
        train[i] = data[perm[i]];
    }
    mplsh.learn(kdtree, train, 1.5 * radius);
    int mplshBuild = (int) (System.currentTimeMillis() - time);
    double lshRecall = 0.0;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        Neighbor<double[], double[]>[] neighbors = lsh.knn(data[perm[i]], knn);
        int hit = 0;
        for (int p = 0; p < knn && answers.get(i)[p] != null; p++) {
            for (int q = 0; q < knn && neighbors[q] != null; q++) {
                if (answers.get(i)[p].index == neighbors[q].index) {
                    hit++;
                    break;
                }
            }
        }
        lshRecall += 1.0 * hit / knn;
    }
    int lshSearch = (int) (System.currentTimeMillis() - time);
    lshRecall /= 1000;
    System.out.format("The recall of LSH is %.1f%%\n", lshRecall * 100);
    double mplshRecall = 0.0;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        Neighbor<double[], double[]>[] neighbors = mplsh.knn(data[perm[i]], knn, 0.95, 10);
        int hit = 0;
        for (int p = 0; p < knn && answers.get(i)[p] != null; p++) {
            for (int q = 0; q < knn && neighbors[q] != null; q++) {
                if (answers.get(i)[p].index == neighbors[q].index) {
                    hit++;
                    break;
                }
            }
        }
        mplshRecall += 1.0 * hit / knn;
    }
    int mplshSearch = (int) (System.currentTimeMillis() - time);
    mplshRecall /= 1000;
    System.out.format("The recall of MPLSH is %.1f%%\n", mplshRecall * 100);
    canvas.removeAll();
    double[] buildTime = { naiveBuild, kdtreeBuild, coverBuild, lshBuild, mplshBuild };
    PlotCanvas build = BarPlot.plot(buildTime, label);
    build.setTitle("Build Time");
    canvas.add(build);
    double[] searchTime = { naiveSearch, kdtreeSearch, coverSearch, lshSearch, mplshSearch };
    PlotCanvas search = BarPlot.plot(searchTime, label);
    search.setTitle("Search Time");
    canvas.add(search);
    validate();
    startButton.setEnabled(true);
    logNSlider.setEnabled(true);
    dimensionSlider.setEnabled(true);
    knnField.setEnabled(true);
}
Also used : MPLSH(smile.neighbor.MPLSH) LSH(smile.neighbor.LSH) CoverTree(smile.neighbor.CoverTree) ArrayList(java.util.ArrayList) Neighbor(smile.neighbor.Neighbor) EuclideanDistance(smile.math.distance.EuclideanDistance) KDTree(smile.neighbor.KDTree) MPLSH(smile.neighbor.MPLSH) LinearSearch(smile.neighbor.LinearSearch) PlotCanvas(smile.plot.PlotCanvas)

Example 23 with PlotCanvas

use of smile.plot.PlotCanvas in project smile by haifengl.

the class GMeansDemo method learn.

@Override
public JComponent learn() {
    try {
        maxClusterNumber = Integer.parseInt(maxClusterNumberField.getText().trim());
        if (maxClusterNumber < 2) {
            JOptionPane.showMessageDialog(this, "Invalid Max K: " + maxClusterNumber, "Error", JOptionPane.ERROR_MESSAGE);
            return null;
        }
    } catch (Exception e) {
        JOptionPane.showMessageDialog(this, "Invalid Max K: " + maxClusterNumberField.getText(), "Error", JOptionPane.ERROR_MESSAGE);
        return null;
    }
    long clock = System.currentTimeMillis();
    GMeans gmeans = new GMeans(dataset[datasetIndex], maxClusterNumber);
    System.out.format("G-Means clusterings %d samples in %dms\n", dataset[datasetIndex].length, System.currentTimeMillis() - clock);
    PlotCanvas plot = ScatterPlot.plot(gmeans.centroids(), '@');
    for (int k = 0; k < gmeans.getNumClusters(); k++) {
        if (gmeans.getClusterSize()[k] > 0) {
            double[][] cluster = new double[gmeans.getClusterSize()[k]][];
            for (int i = 0, j = 0; i < dataset[datasetIndex].length; i++) {
                if (gmeans.getClusterLabel()[i] == k) {
                    cluster[j++] = dataset[datasetIndex][i];
                }
            }
            plot.points(cluster, pointLegend, Palette.COLORS[k % Palette.COLORS.length]);
        }
    }
    plot.points(gmeans.centroids(), '@');
    return plot;
}
Also used : GMeans(smile.clustering.GMeans) PlotCanvas(smile.plot.PlotCanvas)

Example 24 with PlotCanvas

use of smile.plot.PlotCanvas in project smile by haifengl.

the class HierarchicalClusteringDemo method learn.

@Override
public JComponent learn() {
    long clock = System.currentTimeMillis();
    double[][] data = dataset[datasetIndex];
    int n = data.length;
    double[][] proximity = new double[n][];
    for (int i = 0; i < n; i++) {
        proximity[i] = new double[i + 1];
        for (int j = 0; j < i; j++) proximity[i][j] = Math.distance(data[i], data[j]);
    }
    HierarchicalClustering hac = null;
    switch(linkageBox.getSelectedIndex()) {
        case 0:
            hac = new HierarchicalClustering(new SingleLinkage(proximity));
            break;
        case 1:
            hac = new HierarchicalClustering(new CompleteLinkage(proximity));
            break;
        case 2:
            hac = new HierarchicalClustering(new UPGMALinkage(proximity));
            break;
        case 3:
            hac = new HierarchicalClustering(new WPGMALinkage(proximity));
            break;
        case 4:
            hac = new HierarchicalClustering(new UPGMCLinkage(proximity));
            break;
        case 5:
            hac = new HierarchicalClustering(new WPGMCLinkage(proximity));
            break;
        case 6:
            hac = new HierarchicalClustering(new WardLinkage(proximity));
            break;
        default:
            throw new IllegalStateException("Unsupported Linkage");
    }
    System.out.format("Hierarchical clusterings %d samples in %dms\n", dataset[datasetIndex].length, System.currentTimeMillis() - clock);
    int[] membership = hac.partition(clusterNumber);
    int[] clusterSize = new int[clusterNumber];
    for (int i = 0; i < membership.length; i++) {
        clusterSize[membership[i]]++;
    }
    JPanel pane = new JPanel(new GridLayout(1, 3));
    PlotCanvas plot = ScatterPlot.plot(dataset[datasetIndex], pointLegend);
    plot.setTitle("Data");
    pane.add(plot);
    for (int k = 0; k < clusterNumber; k++) {
        double[][] cluster = new double[clusterSize[k]][];
        for (int i = 0, j = 0; i < dataset[datasetIndex].length; i++) {
            if (membership[i] == k) {
                cluster[j++] = dataset[datasetIndex][i];
            }
        }
        plot.points(cluster, pointLegend, Palette.COLORS[k % Palette.COLORS.length]);
    }
    plot = Dendrogram.plot("Dendrogram", hac.getTree(), hac.getHeight());
    plot.setTitle("Dendrogram");
    pane.add(plot);
    return pane;
}
Also used : WPGMCLinkage(smile.clustering.linkage.WPGMCLinkage) JPanel(javax.swing.JPanel) CompleteLinkage(smile.clustering.linkage.CompleteLinkage) WardLinkage(smile.clustering.linkage.WardLinkage) HierarchicalClustering(smile.clustering.HierarchicalClustering) GridLayout(java.awt.GridLayout) SingleLinkage(smile.clustering.linkage.SingleLinkage) WPGMALinkage(smile.clustering.linkage.WPGMALinkage) UPGMALinkage(smile.clustering.linkage.UPGMALinkage) UPGMCLinkage(smile.clustering.linkage.UPGMCLinkage) PlotCanvas(smile.plot.PlotCanvas)

Example 25 with PlotCanvas

use of smile.plot.PlotCanvas in project smile by haifengl.

the class KMeansDemo method learn.

@Override
public JComponent learn() {
    long clock = System.currentTimeMillis();
    KMeans kmeans = new KMeans(dataset[datasetIndex], clusterNumber, 100, 4);
    System.out.format("K-Means clusterings %d samples in %dms\n", dataset[datasetIndex].length, System.currentTimeMillis() - clock);
    PlotCanvas plot = ScatterPlot.plot(dataset[datasetIndex], kmeans.getClusterLabel(), pointLegend, Palette.COLORS);
    plot.points(kmeans.centroids(), '@');
    return plot;
}
Also used : KMeans(smile.clustering.KMeans) PlotCanvas(smile.plot.PlotCanvas)

Aggregations

PlotCanvas (smile.plot.PlotCanvas)36 GridLayout (java.awt.GridLayout)16 JPanel (javax.swing.JPanel)15 EuclideanDistance (smile.math.distance.EuclideanDistance)6 ArrayList (java.util.ArrayList)4 PCA (smile.projection.PCA)4 BorderLayout (java.awt.BorderLayout)3 Attribute (smile.data.Attribute)3 Graph (smile.graph.Graph)3 CoverTree (smile.neighbor.CoverTree)3 KDTree (smile.neighbor.KDTree)3 LSH (smile.neighbor.LSH)3 LinearSearch (smile.neighbor.LinearSearch)3 MPLSH (smile.neighbor.MPLSH)3 Neighbor (smile.neighbor.Neighbor)3 JFrame (javax.swing.JFrame)2 NominalAttribute (smile.data.NominalAttribute)2 DelimitedTextParser (smile.data.parser.DelimitedTextParser)2 IsotonicMDS (smile.mds.IsotonicMDS)2 MDS (smile.mds.MDS)2