use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestFSTs method testIllegallyModifyRootArc.
public void testIllegallyModifyRootArc() throws Exception {
assumeTrue("test relies on assertions", assertsAreEnabled);
Set<BytesRef> terms = new HashSet<>();
for (int i = 0; i < 100; i++) {
String prefix = Character.toString((char) ('a' + i));
terms.add(new BytesRef(prefix));
if (prefix.equals("m") == false) {
for (int j = 0; j < 20; j++) {
// Make a big enough FST that the root cache will be created:
String suffix = TestUtil.randomRealisticUnicodeString(random(), 10, 20);
terms.add(new BytesRef(prefix + suffix));
}
}
}
List<BytesRef> termsList = new ArrayList<>(terms);
Collections.sort(termsList);
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
Builder<BytesRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
IntsRefBuilder input = new IntsRefBuilder();
for (BytesRef term : termsList) {
Util.toIntsRef(term, input);
builder.add(input.get(), term);
}
FST<BytesRef> fst = builder.finish();
Arc<BytesRef> arc = new FST.Arc<>();
fst.getFirstArc(arc);
FST.BytesReader reader = fst.getBytesReader();
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
assertNotNull(arc);
assertEquals(new BytesRef("m"), arc.output);
// NOTE: illegal:
arc.output.length = 0;
fst.getFirstArc(arc);
try {
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
} catch (AssertionError ae) {
// expected
}
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestFSTs method testShortestPathsRandom.
public void testShortestPathsRandom() throws Exception {
final Random random = random();
int numWords = atLeast(1000);
final TreeMap<String, Long> slowCompletor = new TreeMap<>();
final TreeSet<String> allPrefixes = new TreeSet<>();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
for (int i = 0; i < numWords; i++) {
String s;
while (true) {
s = TestUtil.randomSimpleString(random);
if (!slowCompletor.containsKey(s)) {
break;
}
}
for (int j = 1; j < s.length(); j++) {
allPrefixes.add(s.substring(0, j));
}
// weights 1..100
int weight = TestUtil.nextInt(random, 1, 100);
slowCompletor.put(s, (long) weight);
}
for (Map.Entry<String, Long> e : slowCompletor.entrySet()) {
//System.out.println("add: " + e);
builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), e.getValue());
}
final FST<Long> fst = builder.finish();
//System.out.println("SAVE out.dot");
//Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
//Util.toDot(fst, w, false, false);
//w.close();
BytesReader reader = fst.getBytesReader();
//System.out.println("testing: " + allPrefixes.size() + " prefixes");
for (String prefix : allPrefixes) {
// 1. run prefix against fst, then complete by value
//System.out.println("TEST: " + prefix);
long prefixOutput = 0;
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
for (int idx = 0; idx < prefix.length(); idx++) {
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
fail();
}
prefixOutput += arc.output;
}
final int topN = TestUtil.nextInt(random, 1, 10);
Util.TopResults<Long> r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(), minLongComparator, topN, true);
assertTrue(r.isComplete);
// 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
final List<Result<Long>> matches = new ArrayList<>();
// TODO: could be faster... but it's slowCompletor for a reason
for (Map.Entry<String, Long> e : slowCompletor.entrySet()) {
if (e.getKey().startsWith(prefix)) {
//System.out.println(" consider " + e.getKey());
matches.add(new Result<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRefBuilder()), e.getValue() - prefixOutput));
}
}
assertTrue(matches.size() > 0);
Collections.sort(matches, new TieBreakByInputComparator<>(minLongComparator));
if (matches.size() > topN) {
matches.subList(topN, matches.size()).clear();
}
assertEquals(matches.size(), r.topN.size());
for (int hit = 0; hit < r.topN.size(); hit++) {
//System.out.println(" check hit " + hit);
assertEquals(matches.get(hit).input, r.topN.get(hit).input);
assertEquals(matches.get(hit).output, r.topN.get(hit).output);
}
}
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestFSTs method testShortestPaths.
public void testShortestPaths() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), 7L);
builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), 17L);
final FST<Long> fst = builder.finish();
//Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
//Util.toDot(fst, w, false, false);
//w.close();
Util.TopResults<Long> res = Util.shortestPaths(fst, fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), minLongComparator, 3, true);
assertTrue(res.isComplete);
assertEquals(3, res.topN.size());
assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
assertEquals(7L, res.topN.get(0).output.longValue());
assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
assertEquals(17L, res.topN.get(1).output.longValue());
assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
assertEquals(22L, res.topN.get(2).output.longValue());
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class FiniteStringsIteratorTest method testFiniteStringsEatsStack.
public void testFiniteStringsEatsStack() {
char[] chars = new char[50000];
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
String bigString1 = new String(chars);
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
String bigString2 = new String(chars);
Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
FiniteStringsIterator iterator = new FiniteStringsIterator(a);
List<IntsRef> actual = getFiniteStrings(iterator);
assertEquals(2, actual.size());
IntsRefBuilder scratch = new IntsRefBuilder();
Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch);
assertTrue(actual.contains(scratch.get()));
Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch);
assertTrue(actual.contains(scratch.get()));
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class FiniteStringsIteratorTest method testShortAccept.
public void testShortAccept() {
Automaton a = Operations.union(Automata.makeString("x"), Automata.makeString("xy"));
a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
FiniteStringsIterator iterator = new FiniteStringsIterator(a);
List<IntsRef> actual = getFiniteStrings(iterator);
assertEquals(2, actual.size());
IntsRefBuilder x = new IntsRefBuilder();
Util.toIntsRef(new BytesRef("x"), x);
assertTrue(actual.contains(x.get()));
IntsRefBuilder xy = new IntsRefBuilder();
Util.toIntsRef(new BytesRef("xy"), xy);
assertTrue(actual.contains(xy.get()));
}
Aggregations