Search in sources :

Example 51 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testInternalFinalState.

public void testInternalFinalState() throws Exception {
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
    builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
    builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
    final FST<Long> fst = builder.finish();
    StringWriter w = new StringWriter();
    //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
    Util.toDot(fst, w, false, false);
    w.close();
    //System.out.println(w.toString());
    // check for accept state at label t
    assertTrue(w.toString().indexOf("[label=\"t\" style=\"bold\"") != -1);
    // check for accept state at label n
    assertTrue(w.toString().indexOf("[label=\"n\" style=\"bold\"") != -1);
}
Also used : StringWriter(java.io.StringWriter) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 52 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testShortestPathsWFST.

/** like testShortestPaths, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFST() throws Exception {
    PairOutputs<Long, Long> outputs = new PairOutputs<>(// weight
    PositiveIntOutputs.getSingleton(), // output
    PositiveIntOutputs.getSingleton());
    final Builder<Pair<Long, Long>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), outputs.newPair(22L, 57L));
    builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), outputs.newPair(7L, 36L));
    builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), outputs.newPair(17L, 85L));
    final FST<Pair<Long, Long>> fst = builder.finish();
    //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
    //Util.toDot(fst, w, false, false);
    //w.close();
    Util.TopResults<Pair<Long, Long>> res = Util.shortestPaths(fst, fst.getFirstArc(new FST.Arc<Pair<Long, Long>>()), outputs.getNoOutput(), minPairWeightComparator, 3, true);
    assertTrue(res.isComplete);
    assertEquals(3, res.topN.size());
    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
    // weight
    assertEquals(7L, res.topN.get(0).output.output1.longValue());
    // output
    assertEquals(36L, res.topN.get(0).output.output2.longValue());
    assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
    // weight
    assertEquals(17L, res.topN.get(1).output.output1.longValue());
    // output
    assertEquals(85L, res.topN.get(1).output.output2.longValue());
    assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
    // weight
    assertEquals(22L, res.topN.get(2).output.output1.longValue());
    // output
    assertEquals(57L, res.topN.get(2).output.output2.longValue());
}
Also used : Arc(org.apache.lucene.util.fst.FST.Arc) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) TestUtil(org.apache.lucene.util.TestUtil) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) Pair(org.apache.lucene.util.fst.PairOutputs.Pair)

Example 53 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestUTF32ToUTF8 method testSingleton.

public void testSingleton() throws Exception {
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        String s = TestUtil.randomRealisticUnicodeString(random());
        Automaton a = Automata.makeString(s);
        Automaton utf8 = new UTF32ToUTF8().convert(a);
        IntsRefBuilder ints = new IntsRefBuilder();
        Util.toIntsRef(new BytesRef(s), ints);
        Set<IntsRef> set = new HashSet<>();
        set.add(ints.get());
        assertEquals(set, TestOperations.getFiniteStrings(utf8));
    }
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet)

Example 54 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testDuplicateFSAString.

public void testDuplicateFSAString() throws Exception {
    String str = "foobar";
    final Outputs<Object> outputs = NoOutputs.getSingleton();
    final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    IntsRefBuilder ints = new IntsRefBuilder();
    for (int i = 0; i < 10; i++) {
        b.add(Util.toIntsRef(new BytesRef(str), ints), outputs.getNoOutput());
    }
    FST<Object> fst = b.finish();
    // count the input paths
    int count = 0;
    final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<>(fst);
    while (fstEnum.next() != null) {
        count++;
    }
    assertEquals(1, count);
    assertNotNull(Util.get(fst, new BytesRef(str)));
    assertNull(Util.get(fst, new BytesRef("foobaz")));
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) FSTTester.simpleRandomString(org.apache.lucene.util.fst.FSTTester.simpleRandomString) FSTTester.getRandomString(org.apache.lucene.util.fst.FSTTester.getRandomString) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 55 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testRejectNoLimits.

public void testRejectNoLimits() throws IOException {
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
    builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), 7L);
    builder.add(Util.toIntsRef(new BytesRef("adcd"), scratch), 17L);
    builder.add(Util.toIntsRef(new BytesRef("adcde"), scratch), 17L);
    builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), 17L);
    final FST<Long> fst = builder.finish();
    final AtomicInteger rejectCount = new AtomicInteger();
    Util.TopNSearcher<Long> searcher = new Util.TopNSearcher<Long>(fst, 2, 6, minLongComparator) {

        @Override
        protected boolean acceptResult(IntsRef input, Long output) {
            boolean accept = output.intValue() == 7;
            if (!accept) {
                rejectCount.incrementAndGet();
            }
            return accept;
        }
    };
    searcher.addStartPaths(fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
    Util.TopResults<Long> res = searcher.search();
    assertEquals(rejectCount.get(), 4);
    // rejected(4) + topN(2) <= maxQueueSize(6)
    assertTrue(res.isComplete);
    assertEquals(1, res.topN.size());
    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
    assertEquals(7L, res.topN.get(0).output.longValue());
    rejectCount.set(0);
    searcher = new Util.TopNSearcher<Long>(fst, 2, 5, minLongComparator) {

        @Override
        protected boolean acceptResult(IntsRef input, Long output) {
            boolean accept = output.intValue() == 7;
            if (!accept) {
                rejectCount.incrementAndGet();
            }
            return accept;
        }
    };
    searcher.addStartPaths(fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
    res = searcher.search();
    assertEquals(rejectCount.get(), 4);
    // rejected(4) + topN(2) > maxQueueSize(5)
    assertFalse(res.isComplete);
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) TestUtil(org.apache.lucene.util.TestUtil) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Arc(org.apache.lucene.util.fst.FST.Arc) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IntsRef(org.apache.lucene.util.IntsRef) FSTTester.toIntsRef(org.apache.lucene.util.fst.FSTTester.toIntsRef) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)55 BytesRef (org.apache.lucene.util.BytesRef)32 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)25 IntsRef (org.apache.lucene.util.IntsRef)19 ArrayList (java.util.ArrayList)10 HashSet (java.util.HashSet)10 Builder (org.apache.lucene.util.fst.Builder)10 Arc (org.apache.lucene.util.fst.FST.Arc)9 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)8 Map (java.util.Map)7 HashMap (java.util.HashMap)5 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)5 CharsRef (org.apache.lucene.util.CharsRef)5 TestUtil (org.apache.lucene.util.TestUtil)5 FSTTester.getRandomString (org.apache.lucene.util.fst.FSTTester.getRandomString)5 FSTTester.simpleRandomString (org.apache.lucene.util.fst.FSTTester.simpleRandomString)5 TreeMap (java.util.TreeMap)4 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)4 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)4 IOException (java.io.IOException)3