use of smile.math.distance.EditDistance in project smile by haifengl.
the class LinearSearchSpeedTest method testString.
/**
* Test of range method, of class LinearSearch.
*/
@Test
public void testString() {
System.out.println("string");
List<String> words = new ArrayList<>();
long start = System.currentTimeMillis();
try {
BufferedReader input = smile.data.parser.IOUtils.getTestDataReader("neighbor/index.noun");
String line = input.readLine();
while (line != null) {
if (!line.startsWith(" ")) {
String[] w = line.split("\\s");
words.add(w[0].replace('_', ' '));
}
line = input.readLine();
}
} catch (Exception e) {
System.err.println(e);
}
double time = (System.currentTimeMillis() - start) / 1000.0;
System.out.format("Loading string data: %.2fs%n", time);
String[] data = words.toArray(new String[words.size()]);
LinearSearch<String> naive = new LinearSearch<>(data, new EditDistance(50, true));
start = System.currentTimeMillis();
List<Neighbor<String, String>> neighbors = new ArrayList<>();
for (int i = 1000; i < 1100; i++) {
naive.range(words.get(i), 1, neighbors);
neighbors.clear();
}
time = (System.currentTimeMillis() - start) / 1000.0;
System.out.format("Linear string search: %.2fs%n", time);
}
use of smile.math.distance.EditDistance in project smile by haifengl.
the class ApproximateStringSearchDemo method run.
@Override
public void run() {
startButton.setEnabled(false);
knnField.setEnabled(false);
if (data == null) {
System.out.print("Loading dataset...");
List<String> words = new ArrayList<>();
try {
FileInputStream stream = new FileInputStream(smile.data.parser.IOUtils.getTestDataFile("index.noun"));
BufferedReader input = new BufferedReader(new InputStreamReader(stream));
String line = input.readLine();
while (line != null) {
if (!line.startsWith(" ")) {
String[] w = line.split("\\s");
words.add(w[0].replace('_', ' '));
}
line = input.readLine();
}
} catch (Exception e) {
System.err.println(e);
}
data = words.toArray(new String[1]);
System.out.println(words.size() + " words");
System.out.println("Building searching data structure...");
long time = System.currentTimeMillis();
naive = new LinearSearch<>(data, new EditDistance(50, true));
int naiveBuild = (int) (System.currentTimeMillis() - time) / 1000;
time = System.currentTimeMillis();
bktree = new BKTree<>(new EditDistance(50, true));
bktree.add(data);
int bktreeBuild = (int) (System.currentTimeMillis() - time) / 1000;
time = System.currentTimeMillis();
cover = new CoverTree<>(data, new EditDistance(50, true));
int coverBuild = (int) (System.currentTimeMillis() - time) / 1000;
double[] buildTime = { naiveBuild, bktreeBuild, coverBuild };
PlotCanvas build = BarPlot.plot(buildTime, label);
build.setTitle("Build Time");
canvas.add(build);
validate();
}
int[] perm = Math.permutate(data.length);
System.out.println("Perform 1000 searches...");
long time = System.currentTimeMillis();
List<Neighbor<String, String>> neighbors = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
naive.range(data[perm[i]], knn, neighbors);
neighbors.clear();
}
int naiveSearch = (int) (System.currentTimeMillis() - time) / 1000;
time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
bktree.range(data[perm[i]], knn, neighbors);
neighbors.clear();
}
int kdtreeSearch = (int) (System.currentTimeMillis() - time) / 1000;
time = System.currentTimeMillis();
for (int i = 0; i < 1000; i++) {
cover.range(data[perm[i]], knn, neighbors);
neighbors.clear();
}
int coverSearch = (int) (System.currentTimeMillis() - time) / 1000;
double[] searchTime = { naiveSearch, kdtreeSearch, coverSearch };
PlotCanvas search = BarPlot.plot(searchTime, label);
search.setTitle("Search Time of k = " + knn);
canvas.add(search);
if (canvas.getComponentCount() > 3)
canvas.setLayout(new GridLayout(2, 2));
validate();
startButton.setEnabled(true);
knnField.setEnabled(true);
}
Aggregations