Search in sources :

Example 16 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testSingletonNoLimit.

public void testSingletonNoLimit() {
    Automaton a = Automata.makeString("foobar");
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertEquals(1, actual.size());
    IntsRefBuilder scratch = new IntsRefBuilder();
    Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
    assertTrue(actual.contains(scratch.get()));
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder)

Example 17 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testRandomFiniteStrings1.

public void testRandomFiniteStrings1() {
    int numStrings = atLeast(100);
    if (VERBOSE) {
        System.out.println("TEST: numStrings=" + numStrings);
    }
    Set<IntsRef> strings = new HashSet<>();
    List<Automaton> automata = new ArrayList<>();
    IntsRefBuilder scratch = new IntsRefBuilder();
    for (int i = 0; i < numStrings; i++) {
        String s = TestUtil.randomSimpleString(random(), 1, 200);
        Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
        if (strings.add(scratch.toIntsRef())) {
            automata.add(Automata.makeString(s));
            if (VERBOSE) {
                System.out.println("  add string=" + s);
            }
        }
    }
    // TODO: we could sometimes use
    // DaciukMihovAutomatonBuilder here
    // TODO: what other random things can we do here...
    Automaton a = Operations.union(automata);
    if (random().nextBoolean()) {
        a = MinimizationOperations.minimize(a, 1000000);
        if (VERBOSE) {
            System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
        }
    } else if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("TEST: a.determinize");
        }
        a = Operations.determinize(a, 1000000);
    } else if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("TEST: a.removeDeadStates");
        }
        a = Operations.removeDeadStates(a);
    }
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertFiniteStringsRecursive(a, actual);
    if (!strings.equals(new HashSet<>(actual))) {
        System.out.println("strings.size()=" + strings.size() + " actual.size=" + actual.size());
        List<IntsRef> x = new ArrayList<>(strings);
        Collections.sort(x);
        List<IntsRef> y = new ArrayList<>(actual);
        Collections.sort(y);
        int end = Math.min(x.size(), y.size());
        for (int i = 0; i < end; i++) {
            System.out.println("  i=" + i + " string=" + toString(x.get(i)) + " actual=" + toString(y.get(i)));
        }
        fail("wrong strings found");
    }
}
Also used : ArrayList(java.util.ArrayList) IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) HashSet(java.util.HashSet)

Example 18 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestAutomaton method testMakeBinaryIntervalRandom.

public void testMakeBinaryIntervalRandom() throws Exception {
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        BytesRef minTerm = TestUtil.randomBinaryTerm(random());
        boolean minInclusive = random().nextBoolean();
        BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
        boolean maxInclusive = random().nextBoolean();
        Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
        for (int iter2 = 0; iter2 < 500; iter2++) {
            BytesRef term = TestUtil.randomBinaryTerm(random());
            int minCmp = minTerm.compareTo(term);
            int maxCmp = maxTerm.compareTo(term);
            boolean expected;
            if (minCmp > 0 || maxCmp < 0) {
                expected = false;
            } else if (minCmp == 0 && maxCmp == 0) {
                expected = minInclusive && maxInclusive;
            } else if (minCmp == 0) {
                expected = minInclusive;
            } else if (maxCmp == 0) {
                expected = maxInclusive;
            } else {
                expected = true;
            }
            if (VERBOSE) {
                System.out.println("  check term=" + term + " expected=" + expected);
            }
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(term, intsBuilder);
            assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
        }
    }
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 19 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class Operations method getSingleton.

/** If this automaton accepts a single input, return it.  Else, return null.
   *  The automaton must be deterministic. */
public static IntsRef getSingleton(Automaton a) {
    if (a.isDeterministic() == false) {
        throw new IllegalArgumentException("input automaton must be deterministic");
    }
    IntsRefBuilder builder = new IntsRefBuilder();
    HashSet<Integer> visited = new HashSet<>();
    int s = 0;
    Transition t = new Transition();
    while (true) {
        visited.add(s);
        if (a.isAccept(s) == false) {
            if (a.getNumTransitions(s) == 1) {
                a.getTransition(s, 0, t);
                if (t.min == t.max && !visited.contains(t.dest)) {
                    builder.append(t.min);
                    s = t.dest;
                    continue;
                }
            }
        } else if (a.getNumTransitions(s) == 0) {
            return builder.get();
        }
        // Automaton accepts more than one string:
        return null;
    }
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) HashSet(java.util.HashSet)

Example 20 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class BaseSynonymParserTestCase method assertEntryAbsent.

/**
   * Validates that there are no synonyms for the given word.
   * @param synonynMap  the generated synonym map after parsing
   * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
   *                    All spaces will be replaced by word separators.
   */
public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException {
    word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
    BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
    assertNull("There should be no synonyms for: " + word, value);
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) CharsRef(org.apache.lucene.util.CharsRef)

Aggregations

IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)55 BytesRef (org.apache.lucene.util.BytesRef)32 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)25 IntsRef (org.apache.lucene.util.IntsRef)19 ArrayList (java.util.ArrayList)10 HashSet (java.util.HashSet)10 Builder (org.apache.lucene.util.fst.Builder)10 Arc (org.apache.lucene.util.fst.FST.Arc)9 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)8 Map (java.util.Map)7 HashMap (java.util.HashMap)5 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)5 CharsRef (org.apache.lucene.util.CharsRef)5 TestUtil (org.apache.lucene.util.TestUtil)5 FSTTester.getRandomString (org.apache.lucene.util.fst.FSTTester.getRandomString)5 FSTTester.simpleRandomString (org.apache.lucene.util.fst.FSTTester.simpleRandomString)5 TreeMap (java.util.TreeMap)4 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)4 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)4 IOException (java.io.IOException)3