Search in sources :

Example 1 with EditDistance

use of smile.math.distance.EditDistance in project smile by haifengl.

the class LinearSearchSpeedTest method testString.

/**
     * Test of range method, of class LinearSearch.
     */
@Test
public void testString() {
    System.out.println("string");
    List<String> words = new ArrayList<>();
    long start = System.currentTimeMillis();
    try {
        BufferedReader input = smile.data.parser.IOUtils.getTestDataReader("neighbor/index.noun");
        String line = input.readLine();
        while (line != null) {
            if (!line.startsWith(" ")) {
                String[] w = line.split("\\s");
                words.add(w[0].replace('_', ' '));
            }
            line = input.readLine();
        }
    } catch (Exception e) {
        System.err.println(e);
    }
    double time = (System.currentTimeMillis() - start) / 1000.0;
    System.out.format("Loading string data: %.2fs%n", time);
    String[] data = words.toArray(new String[words.size()]);
    LinearSearch<String> naive = new LinearSearch<>(data, new EditDistance(50, true));
    start = System.currentTimeMillis();
    List<Neighbor<String, String>> neighbors = new ArrayList<>();
    for (int i = 1000; i < 1100; i++) {
        naive.range(words.get(i), 1, neighbors);
        neighbors.clear();
    }
    time = (System.currentTimeMillis() - start) / 1000.0;
    System.out.format("Linear string search: %.2fs%n", time);
}
Also used : ArrayList(java.util.ArrayList) BufferedReader(java.io.BufferedReader) EditDistance(smile.math.distance.EditDistance) Test(org.junit.Test)

Example 2 with EditDistance

use of smile.math.distance.EditDistance in project smile by haifengl.

the class ApproximateStringSearchDemo method run.

@Override
public void run() {
    startButton.setEnabled(false);
    knnField.setEnabled(false);
    if (data == null) {
        System.out.print("Loading dataset...");
        List<String> words = new ArrayList<>();
        try {
            FileInputStream stream = new FileInputStream(smile.data.parser.IOUtils.getTestDataFile("index.noun"));
            BufferedReader input = new BufferedReader(new InputStreamReader(stream));
            String line = input.readLine();
            while (line != null) {
                if (!line.startsWith(" ")) {
                    String[] w = line.split("\\s");
                    words.add(w[0].replace('_', ' '));
                }
                line = input.readLine();
            }
        } catch (Exception e) {
            System.err.println(e);
        }
        data = words.toArray(new String[1]);
        System.out.println(words.size() + " words");
        System.out.println("Building searching data structure...");
        long time = System.currentTimeMillis();
        naive = new LinearSearch<>(data, new EditDistance(50, true));
        int naiveBuild = (int) (System.currentTimeMillis() - time) / 1000;
        time = System.currentTimeMillis();
        bktree = new BKTree<>(new EditDistance(50, true));
        bktree.add(data);
        int bktreeBuild = (int) (System.currentTimeMillis() - time) / 1000;
        time = System.currentTimeMillis();
        cover = new CoverTree<>(data, new EditDistance(50, true));
        int coverBuild = (int) (System.currentTimeMillis() - time) / 1000;
        double[] buildTime = { naiveBuild, bktreeBuild, coverBuild };
        PlotCanvas build = BarPlot.plot(buildTime, label);
        build.setTitle("Build Time");
        canvas.add(build);
        validate();
    }
    int[] perm = Math.permutate(data.length);
    System.out.println("Perform 1000 searches...");
    long time = System.currentTimeMillis();
    List<Neighbor<String, String>> neighbors = new ArrayList<>();
    for (int i = 0; i < 1000; i++) {
        naive.range(data[perm[i]], knn, neighbors);
        neighbors.clear();
    }
    int naiveSearch = (int) (System.currentTimeMillis() - time) / 1000;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        bktree.range(data[perm[i]], knn, neighbors);
        neighbors.clear();
    }
    int kdtreeSearch = (int) (System.currentTimeMillis() - time) / 1000;
    time = System.currentTimeMillis();
    for (int i = 0; i < 1000; i++) {
        cover.range(data[perm[i]], knn, neighbors);
        neighbors.clear();
    }
    int coverSearch = (int) (System.currentTimeMillis() - time) / 1000;
    double[] searchTime = { naiveSearch, kdtreeSearch, coverSearch };
    PlotCanvas search = BarPlot.plot(searchTime, label);
    search.setTitle("Search Time of k = " + knn);
    canvas.add(search);
    if (canvas.getComponentCount() > 3)
        canvas.setLayout(new GridLayout(2, 2));
    validate();
    startButton.setEnabled(true);
    knnField.setEnabled(true);
}
Also used : InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) Neighbor(smile.neighbor.Neighbor) FileInputStream(java.io.FileInputStream) GridLayout(java.awt.GridLayout) BufferedReader(java.io.BufferedReader) EditDistance(smile.math.distance.EditDistance) PlotCanvas(smile.plot.PlotCanvas)

Aggregations

BufferedReader (java.io.BufferedReader)2 ArrayList (java.util.ArrayList)2 EditDistance (smile.math.distance.EditDistance)2 GridLayout (java.awt.GridLayout)1 FileInputStream (java.io.FileInputStream)1 InputStreamReader (java.io.InputStreamReader)1 Test (org.junit.Test)1 Neighbor (smile.neighbor.Neighbor)1 PlotCanvas (smile.plot.PlotCanvas)1