Search in sources :

Example 21 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testFiniteStringsEatsStack.

public void testFiniteStringsEatsStack() {
    char[] chars = new char[50000];
    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
    String bigString1 = new String(chars);
    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
    String bigString2 = new String(chars);
    Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertEquals(2, actual.size());
    IntsRefBuilder scratch = new IntsRefBuilder();
    Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch);
    assertTrue(actual.contains(scratch.get()));
    Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch);
    assertTrue(actual.contains(scratch.get()));
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder)

Example 22 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testShortAccept.

public void testShortAccept() {
    Automaton a = Operations.union(Automata.makeString("x"), Automata.makeString("xy"));
    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertEquals(2, actual.size());
    IntsRefBuilder x = new IntsRefBuilder();
    Util.toIntsRef(new BytesRef("x"), x);
    assertTrue(actual.contains(x.get()));
    IntsRefBuilder xy = new IntsRefBuilder();
    Util.toIntsRef(new BytesRef("xy"), xy);
    assertTrue(actual.contains(xy.get()));
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 23 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testSingletonNoLimit.

public void testSingletonNoLimit() {
    Automaton a = Automata.makeString("foobar");
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertEquals(1, actual.size());
    IntsRefBuilder scratch = new IntsRefBuilder();
    Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
    assertTrue(actual.contains(scratch.get()));
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder)

Example 24 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testRandomFiniteStrings1.

public void testRandomFiniteStrings1() {
    int numStrings = atLeast(100);
    if (VERBOSE) {
        System.out.println("TEST: numStrings=" + numStrings);
    }
    Set<IntsRef> strings = new HashSet<>();
    List<Automaton> automata = new ArrayList<>();
    IntsRefBuilder scratch = new IntsRefBuilder();
    for (int i = 0; i < numStrings; i++) {
        String s = TestUtil.randomSimpleString(random(), 1, 200);
        Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
        if (strings.add(scratch.toIntsRef())) {
            automata.add(Automata.makeString(s));
            if (VERBOSE) {
                System.out.println("  add string=" + s);
            }
        }
    }
    // TODO: we could sometimes use
    // DaciukMihovAutomatonBuilder here
    // TODO: what other random things can we do here...
    Automaton a = Operations.union(automata);
    if (random().nextBoolean()) {
        a = MinimizationOperations.minimize(a, 1000000);
        if (VERBOSE) {
            System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
        }
    } else if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("TEST: a.determinize");
        }
        a = Operations.determinize(a, 1000000);
    } else if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("TEST: a.removeDeadStates");
        }
        a = Operations.removeDeadStates(a);
    }
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertFiniteStringsRecursive(a, actual);
    if (!strings.equals(new HashSet<>(actual))) {
        System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size());
        List<IntsRef> x = new ArrayList<>(strings);
        Collections.sort(x);
        List<IntsRef> y = new ArrayList<>(actual);
        Collections.sort(y);
        int end = Math.min(x.size(), y.size());
        for (int i = 0; i < end; i++) {
            System.out.println("  i=" + i + " string=" + toString(x.get(i)) + " actual=" + toString(y.get(i)));
        }
        fail("wrong strings found");
    }
}
Also used : ArrayList(java.util.ArrayList) IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) HashSet(java.util.HashSet)

Example 25 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestAutomaton method testMakeBinaryIntervalRandom.

public void testMakeBinaryIntervalRandom() throws Exception {
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        BytesRef minTerm = TestUtil.randomBinaryTerm(random());
        boolean minInclusive = random().nextBoolean();
        BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
        boolean maxInclusive = random().nextBoolean();
        Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
        for (int iter2 = 0; iter2 < 500; iter2++) {
            BytesRef term = TestUtil.randomBinaryTerm(random());
            int minCmp = minTerm.compareTo(term);
            int maxCmp = maxTerm.compareTo(term);
            boolean expected;
            if (minCmp > 0 || maxCmp < 0) {
                expected = false;
            } else if (minCmp == 0 && maxCmp == 0) {
                expected = minInclusive && maxInclusive;
            } else if (minCmp == 0) {
                expected = minInclusive;
            } else if (maxCmp == 0) {
                expected = maxInclusive;
            } else {
                expected = true;
            }
            if (VERBOSE) {
                System.out.println("  check term=" + term + " expected=" + expected);
            }
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(term, intsBuilder);
            assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
        }
    }
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)55 BytesRef (org.apache.lucene.util.BytesRef)32 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)25 IntsRef (org.apache.lucene.util.IntsRef)19 ArrayList (java.util.ArrayList)10 HashSet (java.util.HashSet)10 Builder (org.apache.lucene.util.fst.Builder)10 Arc (org.apache.lucene.util.fst.FST.Arc)9 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)8 Map (java.util.Map)7 HashMap (java.util.HashMap)5 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)5 CharsRef (org.apache.lucene.util.CharsRef)5 TestUtil (org.apache.lucene.util.TestUtil)5 FSTTester.getRandomString (org.apache.lucene.util.fst.FSTTester.getRandomString)5 FSTTester.simpleRandomString (org.apache.lucene.util.fst.FSTTester.simpleRandomString)5 TreeMap (java.util.TreeMap)4 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)4 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)4 IOException (java.io.IOException)3