Search in sources :

Example 26 with EuclideanDistance

use of smile.math.distance.EuclideanDistance in project smile by haifengl.

the class KNNDemo method run.

@Override
public void run() {
    startButton.setEnabled(false);
    logNSlider.setEnabled(false);
    dimensionSlider.setEnabled(false);
    knnField.setEnabled(false);
    logN = logNSlider.getValue();
    dimension = dimensionSlider.getValue();
    System.out.println("Generating dataset...");
    int n = (int) Math.pow(10, logN);
    double[][] data = new double[n][];
    for (int i = 0; i < n; i++) {
        data[i] = new double[dimension];
        for (int j = 0; j < dimension; j++) {
            data[i][j] = Math.random();
        }
    }
    int[] perm = Math.permutate(n);
    System.out.println("Building searching data structure...");
    long time = System.currentTimeMillis();
    LinearSearch<double[]> naive = new LinearSearch<>(data, new EuclideanDistance());
    int naiveBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    KDTree<double[]> kdtree = new KDTree<>(data, data);
    int kdtreeBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    CoverTree<double[]> cover = new CoverTree<>(data, new EuclideanDistance());
    int coverBuild = (int) (System.currentTimeMillis() - time);
    System.out.println("Perform 1000 searches...");
    double radius = 0.0;
    List<Neighbor<double[], double[]>[]> answers = new ArrayList<>(1000);
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        answers.add(naive.knn(data[perm[i]], knn));
        for (int j = 0; j < answers.get(i).length; j++) {
            radius += answers.get(i)[j].distance;
        }
    }
    int naiveSearch = (int) (System.currentTimeMillis() - time);
    radius /= 1000 * knn;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        kdtree.knn(data[perm[i]], knn);
    }
    int kdtreeSearch = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        cover.knn(data[perm[i]], knn);
    }
    int coverSearch = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    LSH<double[]> lsh = new LSH<>(dimension, 5, (int) Math.log2(dimension), 4 * radius, 1017881);
    for (int i = 0; i < n; i++) {
        lsh.put(data[i], data[i]);
    }
    int lshBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    MPLSH<double[]> mplsh = new MPLSH<>(dimension, 2, (int) Math.log2(n), 4 * radius, 1017881);
    for (int i = 0; i < n; i++) {
        mplsh.put(data[i], data[i]);
    }
    double[][] train = new double[1000][];
    for (int i = 0; i < train.length; i++) {
        train[i] = data[perm[i]];
    }
    mplsh.learn(kdtree, train, 1.5 * radius);
    int mplshBuild = (int) (System.currentTimeMillis() - time);
    double lshRecall = 0.0;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        Neighbor<double[], double[]>[] neighbors = lsh.knn(data[perm[i]], knn);
        int hit = 0;
        for (int p = 0; p < knn && answers.get(i)[p] != null; p++) {
            for (int q = 0; q < knn && neighbors[q] != null; q++) {
                if (answers.get(i)[p].index == neighbors[q].index) {
                    hit++;
                    break;
                }
            }
        }
        lshRecall += 1.0 * hit / knn;
    }
    int lshSearch = (int) (System.currentTimeMillis() - time);
    lshRecall /= 1000;
    System.out.format("The recall of LSH is %.1f%%\n", lshRecall * 100);
    double mplshRecall = 0.0;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        Neighbor<double[], double[]>[] neighbors = mplsh.knn(data[perm[i]], knn, 0.95, 10);
        int hit = 0;
        for (int p = 0; p < knn && answers.get(i)[p] != null; p++) {
            for (int q = 0; q < knn && neighbors[q] != null; q++) {
                if (answers.get(i)[p].index == neighbors[q].index) {
                    hit++;
                    break;
                }
            }
        }
        mplshRecall += 1.0 * hit / knn;
    }
    int mplshSearch = (int) (System.currentTimeMillis() - time);
    mplshRecall /= 1000;
    System.out.format("The recall of MPLSH is %.1f%%\n", mplshRecall * 100);
    canvas.removeAll();
    double[] buildTime = { naiveBuild, kdtreeBuild, coverBuild, lshBuild, mplshBuild };
    PlotCanvas build = BarPlot.plot(buildTime, label);
    build.setTitle("Build Time");
    canvas.add(build);
    double[] searchTime = { naiveSearch, kdtreeSearch, coverSearch, lshSearch, mplshSearch };
    PlotCanvas search = BarPlot.plot(searchTime, label);
    search.setTitle("Search Time");
    canvas.add(search);
    validate();
    startButton.setEnabled(true);
    logNSlider.setEnabled(true);
    dimensionSlider.setEnabled(true);
    knnField.setEnabled(true);
}
Also used : MPLSH(smile.neighbor.MPLSH) LSH(smile.neighbor.LSH) CoverTree(smile.neighbor.CoverTree) ArrayList(java.util.ArrayList) Neighbor(smile.neighbor.Neighbor) EuclideanDistance(smile.math.distance.EuclideanDistance) KDTree(smile.neighbor.KDTree) MPLSH(smile.neighbor.MPLSH) LinearSearch(smile.neighbor.LinearSearch) PlotCanvas(smile.plot.PlotCanvas)

Example 27 with EuclideanDistance

use of smile.math.distance.EuclideanDistance in project smile by haifengl.

the class MECDemo method learn.

@Override
public JComponent learn() {
    try {
        range = Double.parseDouble(rangeField.getText().trim());
        if (range <= 0) {
            JOptionPane.showMessageDialog(this, "Invalid Range: " + range, "Error", JOptionPane.ERROR_MESSAGE);
            return null;
        }
    } catch (Exception e) {
        JOptionPane.showMessageDialog(this, "Invalid range: " + rangeField.getText(), "Error", JOptionPane.ERROR_MESSAGE);
        return null;
    }
    long clock = System.currentTimeMillis();
    MEC<double[]> mec = new MEC<>(dataset[datasetIndex], new EuclideanDistance(), clusterNumber, range);
    System.out.format("MEC clusterings %d samples in %dms\n", dataset[datasetIndex].length, System.currentTimeMillis() - clock);
    PlotCanvas plot = ScatterPlot.plot(dataset[datasetIndex], pointLegend);
    for (int k = 0; k < mec.getNumClusters(); k++) {
        double[][] cluster = new double[mec.getClusterSize()[k]][];
        for (int i = 0, j = 0; i < dataset[datasetIndex].length; i++) {
            if (mec.getClusterLabel()[i] == k) {
                cluster[j++] = dataset[datasetIndex][i];
            }
        }
        plot.points(cluster, pointLegend, Palette.COLORS[k % Palette.COLORS.length]);
    }
    return plot;
}
Also used : EuclideanDistance(smile.math.distance.EuclideanDistance) MEC(smile.clustering.MEC) PlotCanvas(smile.plot.PlotCanvas)

Example 28 with EuclideanDistance

use of smile.math.distance.EuclideanDistance in project smile by haifengl.

the class NearestNeighborDemo method run.

@Override
public void run() {
    startButton.setEnabled(false);
    logNSlider.setEnabled(false);
    dimensionSlider.setEnabled(false);
    logN = logNSlider.getValue();
    dimension = dimensionSlider.getValue();
    System.out.println("Generating dataset...");
    int n = (int) Math.pow(10, logN);
    double[][] data = new double[n][];
    for (int i = 0; i < n; i++) {
        data[i] = new double[dimension];
        for (int j = 0; j < dimension; j++) {
            data[i][j] = Math.random();
        }
    }
    int[] perm = Math.permutate(n);
    System.out.println("Building searching data structure...");
    long time = System.currentTimeMillis();
    LinearSearch<double[]> naive = new LinearSearch<>(data, new EuclideanDistance());
    int naiveBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    KDTree<double[]> kdtree = new KDTree<>(data, data);
    int kdtreeBuild = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    CoverTree<double[]> cover = new CoverTree<>(data, new EuclideanDistance());
    int coverBuild = (int) (System.currentTimeMillis() - time);
    System.out.println("Perform 100 searches...");
    int[] answer = new int[100];
    double radius = 0.0;
    time = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
        Neighbor<double[], double[]> neighbor = naive.nearest(data[perm[i]]);
        answer[i] = neighbor.index;
        radius += neighbor.distance;
    }
    int naiveSearch = (int) (System.currentTimeMillis() - time);
    radius /= 100;
    time = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
        kdtree.nearest(data[perm[i]]);
    }
    int kdtreeSearch = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
        cover.nearest(data[perm[i]]);
    }
    int coverSearch = (int) (System.currentTimeMillis() - time);
    time = System.currentTimeMillis();
    LSH<double[]> lsh = new LSH<>(dimension, 5, (int) Math.ceil(Math.log2(dimension)), 4 * radius, 1017881);
    for (int i = 0; i < n; i++) {
        lsh.put(data[i], data[i]);
    }
    int lshBuild = (int) (System.currentTimeMillis() - time);
    double lshRecall = 0.0;
    time = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
        if (lsh.nearest(data[perm[i]]).index == answer[i]) {
            lshRecall++;
        }
    }
    int lshSearch = (int) (System.currentTimeMillis() - time);
    lshRecall /= 100;
    System.out.format("The recall of LSH is %.1f%%\n", lshRecall * 100);
    time = System.currentTimeMillis();
    MPLSH<double[]> mplsh = new MPLSH<>(dimension, 5, (int) Math.ceil(Math.log2(n)), 4 * radius, 1017881);
    for (int i = 0; i < n; i++) {
        mplsh.put(data[i], data[i]);
    }
    double[][] train = new double[1000][];
    for (int i = 0; i < train.length; i++) {
        train[i] = data[perm[i]];
    }
    mplsh.learn(kdtree, train, 1.5 * radius);
    int mplshBuild = (int) (System.currentTimeMillis() - time);
    double mplshRecall = 0.0;
    time = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
        if (mplsh.nearest(data[perm[i]], 0.95, 10).index == answer[i]) {
            mplshRecall++;
        }
    }
    int mplshSearch = (int) (System.currentTimeMillis() - time);
    mplshRecall /= 100;
    System.out.format("The recall of MPLSH is %.1f%%\n", mplshRecall * 100);
    canvas.removeAll();
    double[] buildTime = { naiveBuild, kdtreeBuild, coverBuild, lshBuild, mplshBuild };
    PlotCanvas build = BarPlot.plot(buildTime, label);
    build.setTitle("Build Time");
    canvas.add(build);
    double[] searchTime = { naiveSearch, kdtreeSearch, coverSearch, lshSearch, mplshSearch };
    PlotCanvas search = BarPlot.plot(searchTime, label);
    search.setTitle("Search Time");
    canvas.add(search);
    validate();
    startButton.setEnabled(true);
    logNSlider.setEnabled(true);
    dimensionSlider.setEnabled(true);
}
Also used : MPLSH(smile.neighbor.MPLSH) LSH(smile.neighbor.LSH) CoverTree(smile.neighbor.CoverTree) EuclideanDistance(smile.math.distance.EuclideanDistance) KDTree(smile.neighbor.KDTree) MPLSH(smile.neighbor.MPLSH) LinearSearch(smile.neighbor.LinearSearch) PlotCanvas(smile.plot.PlotCanvas)

Example 29 with EuclideanDistance

use of smile.math.distance.EuclideanDistance in project smile by haifengl.

the class DBScanDemo method learn.

@Override
public JComponent learn() {
    try {
        minPts = Integer.parseInt(minPtsField.getText().trim());
        if (minPts < 1) {
            JOptionPane.showMessageDialog(this, "Invalid MinPts: " + minPts, "Error", JOptionPane.ERROR_MESSAGE);
            return null;
        }
    } catch (Exception e) {
        JOptionPane.showMessageDialog(this, "Invalid MinPts: " + minPtsField.getText(), "Error", JOptionPane.ERROR_MESSAGE);
        return null;
    }
    try {
        range = Double.parseDouble(rangeField.getText().trim());
        if (range <= 0) {
            JOptionPane.showMessageDialog(this, "Invalid Range: " + range, "Error", JOptionPane.ERROR_MESSAGE);
            return null;
        }
    } catch (Exception e) {
        JOptionPane.showMessageDialog(this, "Invalid range: " + rangeField.getText(), "Error", JOptionPane.ERROR_MESSAGE);
        return null;
    }
    long clock = System.currentTimeMillis();
    DBScan<double[]> dbscan = new DBScan<>(dataset[datasetIndex], new EuclideanDistance(), minPts, range);
    System.out.format("DBSCAN clusterings %d samples in %dms\n", dataset[datasetIndex].length, System.currentTimeMillis() - clock);
    JPanel pane = new JPanel(new GridLayout(1, 2));
    PlotCanvas plot = ScatterPlot.plot(dataset[datasetIndex], pointLegend);
    for (int k = 0; k < dbscan.getNumClusters(); k++) {
        double[][] cluster = new double[dbscan.getClusterSize()[k]][];
        for (int i = 0, j = 0; i < dataset[datasetIndex].length; i++) {
            if (dbscan.getClusterLabel()[i] == k) {
                cluster[j++] = dataset[datasetIndex][i];
            }
        }
        plot.points(cluster, pointLegend, Palette.COLORS[k % Palette.COLORS.length]);
    }
    pane.add(plot);
    return pane;
}
Also used : EuclideanDistance(smile.math.distance.EuclideanDistance) JPanel(javax.swing.JPanel) GridLayout(java.awt.GridLayout) DBScan(smile.clustering.DBScan) PlotCanvas(smile.plot.PlotCanvas)

Aggregations

EuclideanDistance (smile.math.distance.EuclideanDistance)29 Test (org.junit.Test)22 AttributeDataset (smile.data.AttributeDataset)19 ArffParser (smile.data.parser.ArffParser)14 RadialBasisFunction (smile.math.rbf.RadialBasisFunction)11 RBFNetwork (smile.regression.RBFNetwork)8 ClassifierTrainer (smile.classification.ClassifierTrainer)6 PlotCanvas (smile.plot.PlotCanvas)6 ArrayList (java.util.ArrayList)5 NominalAttribute (smile.data.NominalAttribute)5 DelimitedTextParser (smile.data.parser.DelimitedTextParser)5 CrossValidation (smile.validation.CrossValidation)4 CoverTree (smile.neighbor.CoverTree)3 KDTree (smile.neighbor.KDTree)3 LSH (smile.neighbor.LSH)3 LinearSearch (smile.neighbor.LinearSearch)3 MPLSH (smile.neighbor.MPLSH)3 Neighbor (smile.neighbor.Neighbor)2 AdjustedRandIndex (smile.validation.AdjustedRandIndex)2 LOOCV (smile.validation.LOOCV)2