Search in sources :

Example 61 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class TestAutomaton method testReverseRandom2.

public void testReverseRandom2() throws Exception {
    int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        //System.out.println("TEST: iter=" + iter);
        Automaton a = AutomatonTestUtil.randomAutomaton(random());
        if (random().nextBoolean()) {
            a = Operations.removeDeadStates(a);
        }
        Automaton ra = Operations.reverse(a);
        Automaton rda = Operations.determinize(ra, Integer.MAX_VALUE);
        if (Operations.isEmpty(a)) {
            assertTrue(Operations.isEmpty(rda));
            continue;
        }
        RandomAcceptedStrings ras = new RandomAcceptedStrings(a);
        for (int iter2 = 0; iter2 < 20; iter2++) {
            // Find string accepted by original automaton
            int[] s = ras.getRandomAcceptedString(random());
            // Reverse it
            for (int j = 0; j < s.length / 2; j++) {
                int x = s[j];
                s[j] = s[s.length - j - 1];
                s[s.length - j - 1] = x;
            }
            //System.out.println("TEST:   iter2=" + iter2 + " s=" + Arrays.toString(s));
            // Make sure reversed automaton accepts it
            assertTrue(Operations.run(rda, new IntsRef(s, 0, s.length)));
        }
    }
}
Also used : RandomAcceptedStrings(org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings) IntsRef(org.apache.lucene.util.IntsRef)

Example 62 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class TestUTF32ToUTF8 method testSingleton.

public void testSingleton() throws Exception {
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        String s = TestUtil.randomRealisticUnicodeString(random());
        Automaton a = Automata.makeString(s);
        Automaton utf8 = new UTF32ToUTF8().convert(a);
        IntsRefBuilder ints = new IntsRefBuilder();
        Util.toIntsRef(new BytesRef(s), ints);
        Set<IntsRef> set = new HashSet<>();
        set.add(ints.get());
        assertEquals(set, TestOperations.getFiniteStrings(utf8));
    }
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet)

Example 63 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class TestFSTs method testRejectNoLimits.

public void testRejectNoLimits() throws IOException {
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
    builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), 7L);
    builder.add(Util.toIntsRef(new BytesRef("adcd"), scratch), 17L);
    builder.add(Util.toIntsRef(new BytesRef("adcde"), scratch), 17L);
    builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), 17L);
    final FST<Long> fst = builder.finish();
    final AtomicInteger rejectCount = new AtomicInteger();
    Util.TopNSearcher<Long> searcher = new Util.TopNSearcher<Long>(fst, 2, 6, minLongComparator) {

        @Override
        protected boolean acceptResult(IntsRef input, Long output) {
            boolean accept = output.intValue() == 7;
            if (!accept) {
                rejectCount.incrementAndGet();
            }
            return accept;
        }
    };
    searcher.addStartPaths(fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
    Util.TopResults<Long> res = searcher.search();
    assertEquals(rejectCount.get(), 4);
    // rejected(4) + topN(2) <= maxQueueSize(6)
    assertTrue(res.isComplete);
    assertEquals(1, res.topN.size());
    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
    assertEquals(7L, res.topN.get(0).output.longValue());
    rejectCount.set(0);
    searcher = new Util.TopNSearcher<Long>(fst, 2, 5, minLongComparator) {

        @Override
        protected boolean acceptResult(IntsRef input, Long output) {
            boolean accept = output.intValue() == 7;
            if (!accept) {
                rejectCount.incrementAndGet();
            }
            return accept;
        }
    };
    searcher.addStartPaths(fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), true, new IntsRefBuilder());
    res = searcher.search();
    assertEquals(rejectCount.get(), 4);
    // rejected(4) + topN(2) > maxQueueSize(5)
    assertFalse(res.isComplete);
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) TestUtil(org.apache.lucene.util.TestUtil) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Arc(org.apache.lucene.util.fst.FST.Arc) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IntsRef(org.apache.lucene.util.IntsRef) FSTTester.toIntsRef(org.apache.lucene.util.fst.FSTTester.toIntsRef) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

IntsRef (org.apache.lucene.util.IntsRef)63 BytesRef (org.apache.lucene.util.BytesRef)19 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)19 HashSet (java.util.HashSet)16 ArrayList (java.util.ArrayList)13 Automaton (org.apache.lucene.util.automaton.Automaton)13 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)12 TokenStreamToAutomaton (org.apache.lucene.analysis.TokenStreamToAutomaton)9 IOException (java.io.IOException)7 Directory (org.apache.lucene.store.Directory)7 HashMap (java.util.HashMap)5 FiniteStringsIterator (org.apache.lucene.util.automaton.FiniteStringsIterator)5 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)5 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 FilterInputStream (java.io.FilterInputStream)4 InputStream (java.io.InputStream)4 Map (java.util.Map)4 Random (java.util.Random)4 TokenStream (org.apache.lucene.analysis.TokenStream)4