use of org.apache.lucene.util.fst.PairOutputs.Pair in project lucene-solr by apache.
the class TestFSTs method testShortestPathsWFSTRandom.
/** like testShortestPathsRandom, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFSTRandom() throws Exception {
int numWords = atLeast(1000);
final TreeMap<String, TwoLongs> slowCompletor = new TreeMap<>();
final TreeSet<String> allPrefixes = new TreeSet<>();
PairOutputs<Long, Long> outputs = new PairOutputs<>(// weight
PositiveIntOutputs.getSingleton(), // output
PositiveIntOutputs.getSingleton());
final Builder<Pair<Long, Long>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
Random random = random();
for (int i = 0; i < numWords; i++) {
String s;
while (true) {
s = TestUtil.randomSimpleString(random);
if (!slowCompletor.containsKey(s)) {
break;
}
}
for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j));
}
// weights 1..100
int weight = TestUtil.nextInt(random, 1, 100);
// outputs 0..500
int output = TestUtil.nextInt(random, 0, 500);
slowCompletor.put(s, new TwoLongs(weight, output));
}
for (Map.Entry<String, TwoLongs> e : slowCompletor.entrySet()) {
//System.out.println("add: " + e);
long weight = e.getValue().a;
long output = e.getValue().b;
builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), outputs.newPair(weight, output));
}
final FST<Pair<Long, Long>> fst = builder.finish();
//System.out.println("SAVE out.dot");
//Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
//Util.toDot(fst, w, false, false);
//w.close();
BytesReader reader = fst.getBytesReader();
//System.out.println("testing: " + allPrefixes.size() + " prefixes");
for (String prefix : allPrefixes) {
// 1. run prefix against fst, then complete by value
//System.out.println("TEST: " + prefix);
Pair<Long, Long> prefixOutput = outputs.getNoOutput();
FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long, Long>>());
for (int idx = 0; idx < prefix.length(); idx++) {
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
fail();
}
prefixOutput = outputs.add(prefixOutput, arc.output);
}
final int topN = TestUtil.nextInt(random, 1, 10);
Util.TopResults<Pair<Long, Long>> r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(), minPairWeightComparator, topN, true);
assertTrue(r.isComplete);
// 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
final List<Result<Pair<Long, Long>>> matches = new ArrayList<>();
// TODO: could be faster... but it's slowCompletor for a reason
for (Map.Entry<String, TwoLongs> e : slowCompletor.entrySet()) {
if (e.getKey().startsWith(prefix)) {
//System.out.println(" consider " + e.getKey());
matches.add(new Result<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRefBuilder()), outputs.newPair(e.getValue().a - prefixOutput.output1, e.getValue().b - prefixOutput.output2)));
}
}
assertTrue(matches.size() > 0);
Collections.sort(matches, new TieBreakByInputComparator<>(minPairWeightComparator));
if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear();
}
assertEquals(matches.size(), r.topN.size());
for (int hit = 0; hit < r.topN.size(); hit++) {
//System.out.println(" check hit " + hit);
assertEquals(matches.get(hit).input, r.topN.get(hit).input);
assertEquals(matches.get(hit).output, r.topN.get(hit).output);
}
}
}
use of org.apache.lucene.util.fst.PairOutputs.Pair in project lucene-solr by apache.
the class TestFSTs method testShortestPathsWFST.
/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFST() throws Exception {
PairOutputs<Long, Long> outputs = new PairOutputs<>(// weight
PositiveIntOutputs.getSingleton(), // output
PositiveIntOutputs.getSingleton());
final Builder<Pair<Long, Long>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), outputs.newPair(22L, 57L));
builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), outputs.newPair(7L, 36L));
builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), outputs.newPair(17L, 85L));
final FST<Pair<Long, Long>> fst = builder.finish();
//Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
//Util.toDot(fst, w, false, false);
//w.close();
Util.TopResults<Pair<Long, Long>> res = Util.shortestPaths(fst, fst.getFirstArc(new FST.Arc<Pair<Long, Long>>()), outputs.getNoOutput(), minPairWeightComparator, 3, true);
assertTrue(res.isComplete);
assertEquals(3, res.topN.size());
assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
// weight
assertEquals(7L, res.topN.get(0).output.output1.longValue());
// output
assertEquals(36L, res.topN.get(0).output.output2.longValue());
assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
// weight
assertEquals(17L, res.topN.get(1).output.output1.longValue());
// output
assertEquals(85L, res.topN.get(1).output.output2.longValue());
assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
// weight
assertEquals(22L, res.topN.get(2).output.output1.longValue());
// output
assertEquals(57L, res.topN.get(2).output.output2.longValue());
}
Aggregations