Search in sources :

Example 46 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FSTTester method verifyUnPruned.

// FST is complete
private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
    final FST<Long> fstLong;
    final Set<Long> validOutputs;
    long minLong = Long.MAX_VALUE;
    long maxLong = Long.MIN_VALUE;
    if (doReverseLookup) {
        @SuppressWarnings("unchecked") FST<Long> fstLong0 = (FST<Long>) fst;
        fstLong = fstLong0;
        validOutputs = new HashSet<>();
        for (InputOutput<T> pair : pairs) {
            Long output = (Long) pair.output;
            maxLong = Math.max(maxLong, output);
            minLong = Math.min(minLong, output);
            validOutputs.add(output);
        }
    } else {
        fstLong = null;
        validOutputs = null;
    }
    if (pairs.size() == 0) {
        assertNull(fst);
        return;
    }
    if (LuceneTestCase.VERBOSE) {
        System.out.println("TEST: now verify " + pairs.size() + " terms");
        for (InputOutput<T> pair : pairs) {
            assertNotNull(pair);
            assertNotNull(pair.input);
            assertNotNull(pair.output);
            System.out.println("  " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
        }
    }
    assertNotNull(fst);
    // them correctly
    if (LuceneTestCase.VERBOSE) {
        System.out.println("TEST: check valid terms/next()");
    }
    {
        IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<>(fst);
        for (InputOutput<T> pair : pairs) {
            IntsRef term = pair.input;
            if (LuceneTestCase.VERBOSE) {
                System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output));
            }
            T output = run(fst, term, null);
            assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
            assertTrue(outputsEqual(pair.output, output));
            // verify enum's next
            IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
            assertNotNull(t);
            assertEquals("expected input=" + inputToString(inputMode, term) + " but fstEnum returned " + inputToString(inputMode, t.input), term, t.input);
            assertTrue(outputsEqual(pair.output, t.output));
        }
        assertNull(fstEnum.next());
    }
    final Map<IntsRef, T> termsMap = new HashMap<>();
    for (InputOutput<T> pair : pairs) {
        termsMap.put(pair.input, pair.output);
    }
    if (doReverseLookup && maxLong > minLong) {
        // Do random lookups so we test null (output doesn't
        // exist) case:
        assertNull(Util.getByOutput(fstLong, minLong - 7));
        assertNull(Util.getByOutput(fstLong, maxLong + 7));
        final int num = LuceneTestCase.atLeast(random, 100);
        for (int iter = 0; iter < num; iter++) {
            Long v = TestUtil.nextLong(random, minLong, maxLong);
            IntsRef input = Util.getByOutput(fstLong, v);
            assertTrue(validOutputs.contains(v) || input == null);
        }
    }
    // find random matching word and make sure it's valid
    if (LuceneTestCase.VERBOSE) {
        System.out.println("TEST: verify random accepted terms");
    }
    final IntsRefBuilder scratch = new IntsRefBuilder();
    int num = LuceneTestCase.atLeast(random, 500);
    for (int iter = 0; iter < num; iter++) {
        T output = randomAcceptedWord(fst, scratch);
        assertTrue("accepted word " + inputToString(inputMode, scratch.get()) + " is not valid", termsMap.containsKey(scratch.get()));
        assertTrue(outputsEqual(termsMap.get(scratch.get()), output));
        if (doReverseLookup) {
            //System.out.println("lookup output=" + output + " outs=" + fst.outputs);
            IntsRef input = Util.getByOutput(fstLong, (Long) output);
            assertNotNull(input);
            //System.out.println("  got " + Util.toBytesRef(input, new BytesRef()).utf8ToString());
            assertEquals(scratch.get(), input);
        }
    }
    // test IntsRefFSTEnum.seek:
    if (LuceneTestCase.VERBOSE) {
        System.out.println("TEST: verify seek");
    }
    IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<>(fst);
    num = LuceneTestCase.atLeast(random, 100);
    for (int iter = 0; iter < num; iter++) {
        if (LuceneTestCase.VERBOSE) {
            System.out.println("  iter=" + iter);
        }
        if (random.nextBoolean()) {
            // seek to term that doesn't exist:
            while (true) {
                final IntsRef term = toIntsRef(getRandomString(random), inputMode);
                int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
                if (pos < 0) {
                    pos = -(pos + 1);
                    // ok doesn't exist
                    //System.out.println("  seek " + inputToString(inputMode, term));
                    final IntsRefFSTEnum.InputOutput<T> seekResult;
                    if (random.nextInt(3) == 0) {
                        if (LuceneTestCase.VERBOSE) {
                            System.out.println("  do non-exist seekExact term=" + inputToString(inputMode, term));
                        }
                        seekResult = fstEnum.seekExact(term);
                        pos = -1;
                    } else if (random.nextBoolean()) {
                        if (LuceneTestCase.VERBOSE) {
                            System.out.println("  do non-exist seekFloor term=" + inputToString(inputMode, term));
                        }
                        seekResult = fstEnum.seekFloor(term);
                        pos--;
                    } else {
                        if (LuceneTestCase.VERBOSE) {
                            System.out.println("  do non-exist seekCeil term=" + inputToString(inputMode, term));
                        }
                        seekResult = fstEnum.seekCeil(term);
                    }
                    if (pos != -1 && pos < pairs.size()) {
                        //System.out.println("    got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output));
                        assertNotNull("got null but expected term=" + inputToString(inputMode, pairs.get(pos).input), seekResult);
                        if (LuceneTestCase.VERBOSE) {
                            System.out.println("    got " + inputToString(inputMode, seekResult.input));
                        }
                        assertEquals("expected " + inputToString(inputMode, pairs.get(pos).input) + " but got " + inputToString(inputMode, seekResult.input), pairs.get(pos).input, seekResult.input);
                        assertTrue(outputsEqual(pairs.get(pos).output, seekResult.output));
                    } else {
                        // seeked before start or beyond end
                        //System.out.println("seek=" + seekTerm);
                        assertNull("expected null but got " + (seekResult == null ? "null" : inputToString(inputMode, seekResult.input)), seekResult);
                        if (LuceneTestCase.VERBOSE) {
                            System.out.println("    got null");
                        }
                    }
                    break;
                }
            }
        } else {
            // seek to term that does exist:
            InputOutput<T> pair = pairs.get(random.nextInt(pairs.size()));
            final IntsRefFSTEnum.InputOutput<T> seekResult;
            if (random.nextInt(3) == 2) {
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("  do exists seekExact term=" + inputToString(inputMode, pair.input));
                }
                seekResult = fstEnum.seekExact(pair.input);
            } else if (random.nextBoolean()) {
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("  do exists seekFloor " + inputToString(inputMode, pair.input));
                }
                seekResult = fstEnum.seekFloor(pair.input);
            } else {
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("  do exists seekCeil " + inputToString(inputMode, pair.input));
                }
                seekResult = fstEnum.seekCeil(pair.input);
            }
            assertNotNull(seekResult);
            assertEquals("got " + inputToString(inputMode, seekResult.input) + " but expected " + inputToString(inputMode, pair.input), pair.input, seekResult.input);
            assertTrue(outputsEqual(pair.output, seekResult.output));
        }
    }
    if (LuceneTestCase.VERBOSE) {
        System.out.println("TEST: mixed next/seek");
    }
    // test mixed next/seek
    num = LuceneTestCase.atLeast(random, 100);
    for (int iter = 0; iter < num; iter++) {
        if (LuceneTestCase.VERBOSE) {
            System.out.println("TEST: iter " + iter);
        }
        // reset:
        fstEnum = new IntsRefFSTEnum<>(fst);
        int upto = -1;
        while (true) {
            boolean isDone = false;
            if (upto == pairs.size() - 1 || random.nextBoolean()) {
                // next
                upto++;
                if (LuceneTestCase.VERBOSE) {
                    System.out.println("  do next");
                }
                isDone = fstEnum.next() == null;
            } else if (upto != -1 && upto < 0.75 * pairs.size() && random.nextBoolean()) {
                int attempt = 0;
                for (; attempt < 10; attempt++) {
                    IntsRef term = toIntsRef(getRandomString(random), inputMode);
                    if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
                        int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
                        assert pos < 0;
                        upto = -(pos + 1);
                        if (random.nextBoolean()) {
                            upto--;
                            assertTrue(upto != -1);
                            if (LuceneTestCase.VERBOSE) {
                                System.out.println("  do non-exist seekFloor(" + inputToString(inputMode, term) + ")");
                            }
                            isDone = fstEnum.seekFloor(term) == null;
                        } else {
                            if (LuceneTestCase.VERBOSE) {
                                System.out.println("  do non-exist seekCeil(" + inputToString(inputMode, term) + ")");
                            }
                            isDone = fstEnum.seekCeil(term) == null;
                        }
                        break;
                    }
                }
                if (attempt == 10) {
                    continue;
                }
            } else {
                final int inc = random.nextInt(pairs.size() - upto - 1);
                upto += inc;
                if (upto == -1) {
                    upto = 0;
                }
                if (random.nextBoolean()) {
                    if (LuceneTestCase.VERBOSE) {
                        System.out.println("  do seekCeil(" + inputToString(inputMode, pairs.get(upto).input) + ")");
                    }
                    isDone = fstEnum.seekCeil(pairs.get(upto).input) == null;
                } else {
                    if (LuceneTestCase.VERBOSE) {
                        System.out.println("  do seekFloor(" + inputToString(inputMode, pairs.get(upto).input) + ")");
                    }
                    isDone = fstEnum.seekFloor(pairs.get(upto).input) == null;
                }
            }
            if (LuceneTestCase.VERBOSE) {
                if (!isDone) {
                    System.out.println("    got " + inputToString(inputMode, fstEnum.current().input));
                } else {
                    System.out.println("    got null");
                }
            }
            if (upto == pairs.size()) {
                assertTrue(isDone);
                break;
            } else {
                assertFalse(isDone);
                assertEquals(pairs.get(upto).input, fstEnum.current().input);
                assertTrue(outputsEqual(pairs.get(upto).output, fstEnum.current().output));
            /*
            if (upto < pairs.size()-1) {
            int tryCount = 0;
            while(tryCount < 10) {
            final IntsRef t = toIntsRef(getRandomString(), inputMode);
            if (pairs.get(upto).input.compareTo(t) < 0) {
            final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0;
            if (LuceneTestCase.VERBOSE) {
            System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected);
            }
            assertEquals(expected, fstEnum.beforeNext(t));
            break;
            }
            tryCount++;
            }
            }
          */
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) IntsRef(org.apache.lucene.util.IntsRef)

Example 47 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestSynonymGraphFilter method testPositionLengthAndType.

/**
   * verify type of token and positionLengths on synonyms of different word counts.
   */
public void testPositionLengthAndType() throws Exception {
    String testFile = "spider man, spiderman\n" + "usa,united states,u s a,united states of america";
    Analyzer analyzer = new MockAnalyzer(random());
    SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
    parser.parse(new StringReader(testFile));
    analyzer.close();
    SynonymMap map = parser.build();
    analyzer = getFlattenAnalyzer(parser, true);
    BytesRef value = Util.get(map.fst, Util.toUTF32(new CharsRef("usa"), new IntsRefBuilder()));
    ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
    final int code = bytesReader.readVInt();
    final int count = code >>> 1;
    final int[] synonymsIdxs = new int[count];
    for (int i = 0; i < count; i++) {
        synonymsIdxs[i] = bytesReader.readVInt();
    }
    BytesRef scratchBytes = new BytesRef();
    map.words.get(synonymsIdxs[2], scratchBytes);
    int synonymLength = 1;
    for (int i = scratchBytes.offset; i < scratchBytes.offset + scratchBytes.length; i++) {
        if (scratchBytes.bytes[i] == SynonymMap.WORD_SEPARATOR) {
            synonymLength++;
        }
    }
    assertEquals(count, 3);
    assertEquals(synonymLength, 4);
    assertAnalyzesTo(analyzer, "spider man", new String[] { "spiderman", "spider", "man" }, new int[] { 0, 0, 7 }, new int[] { 10, 6, 10 }, new String[] { "SYNONYM", "word", "word" }, new int[] { 1, 0, 1 }, new int[] { 2, 1, 1 });
    assertAnalyzesToPositions(analyzer, "amazing spider man", new String[] { "amazing", "spiderman", "spider", "man" }, new String[] { "word", "SYNONYM", "word", "word" }, new int[] { 1, 1, 0, 1 }, new int[] { 1, 2, 1, 1 });
    // System.out.println(toDot(getAnalyzer(parser, true).tokenStream("field", new StringReader("the usa is wealthy"))));
    assertAnalyzesTo(analyzer, "the united states of america is wealthy", new String[] { "the", "usa", "united", "u", "united", "states", "s", "states", "a", "of", "america", "is", "wealthy" }, new int[] { 0, 4, 4, 4, 4, 11, 11, 11, 18, 18, 21, 29, 32 }, new int[] { 3, 28, 10, 10, 10, 28, 17, 17, 28, 20, 28, 31, 39 }, new String[] { "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "word", "word", "word", "word" }, new int[] { 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1 }, new int[] { 1, 4, 1, 1, 1, 3, 1, 1, 2, 1, 1, 1, 1 });
    assertAnalyzesToPositions(analyzer, "spiderman", new String[] { "spider", "spiderman", "man" }, new String[] { "SYNONYM", "word", "SYNONYM" }, new int[] { 1, 0, 1 }, new int[] { 1, 2, 1 });
    assertAnalyzesTo(analyzer, "spiderman enemies", new String[] { "spider", "spiderman", "man", "enemies" }, new int[] { 0, 0, 0, 10 }, new int[] { 9, 9, 9, 17 }, new String[] { "SYNONYM", "word", "SYNONYM", "word" }, new int[] { 1, 0, 1, 1 }, new int[] { 1, 2, 1, 1 });
    assertAnalyzesTo(analyzer, "the usa is wealthy", new String[] { "the", "united", "u", "united", "usa", "states", "s", "states", "a", "of", "america", "is", "wealthy" }, new int[] { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 11 }, new int[] { 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 18 }, new String[] { "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word" }, new int[] { 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1 }, new int[] { 1, 1, 1, 1, 4, 3, 1, 1, 2, 1, 1, 1, 1 });
    assertGraphStrings(analyzer, "the usa is wealthy", new String[] { "the usa is wealthy", "the united states is wealthy", "the u s a is wealthy", "the united states of america is wealthy", // Wrong. Here only due to "sausagization" of the multi word synonyms.
    "the u states is wealthy", "the u states a is wealthy", "the u s of america is wealthy", "the u states of america is wealthy", "the united s a is wealthy", "the united states a is wealthy", "the united s of america is wealthy" });
    assertAnalyzesTo(analyzer, "the united states is wealthy", new String[] { "the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "is", "wealthy" }, new int[] { 0, 4, 4, 4, 4, 11, 11, 11, 11, 11, 11, 18, 21 }, new int[] { 3, 17, 10, 10, 10, 17, 17, 17, 17, 17, 17, 20, 28 }, new String[] { "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word" }, new int[] { 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1 }, new int[] { 1, 4, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1 }, false);
    assertAnalyzesTo(analyzer, "the united states of balance", new String[] { "the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "of", "balance" }, new int[] { 0, 4, 4, 4, 4, 11, 11, 11, 11, 11, 11, 18, 21 }, new int[] { 3, 17, 10, 10, 10, 17, 17, 17, 17, 17, 17, 20, 28 }, new String[] { "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word" }, new int[] { 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1 }, new int[] { 1, 4, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1 });
    analyzer.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringReader(java.io.StringReader) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesRef(org.apache.lucene.util.BytesRef) CharsRef(org.apache.lucene.util.CharsRef)

Example 48 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testFinalOutputOnEndState.

public void testFinalOutputOnEndState() throws Exception {
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
    builder.add(Util.toUTF32("stat", new IntsRefBuilder()), 17L);
    builder.add(Util.toUTF32("station", new IntsRefBuilder()), 10L);
    final FST<Long> fst = builder.finish();
    //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp3/out.dot"));
    StringWriter w = new StringWriter();
    Util.toDot(fst, w, false, false);
    w.close();
    //System.out.println(w.toString());
    assertTrue(w.toString().indexOf("label=\"t/[7]\"") != -1);
}
Also used : StringWriter(java.io.StringWriter) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder)

Example 49 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testLargeOutputsOnArrayArcs.

public void testLargeOutputsOnArrayArcs() throws Exception {
    final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
    final Builder<BytesRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    final byte[] bytes = new byte[300];
    final IntsRefBuilder input = new IntsRefBuilder();
    input.append(0);
    final BytesRef output = new BytesRef(bytes);
    for (int arc = 0; arc < 6; arc++) {
        input.setIntAt(0, arc);
        output.bytes[0] = (byte) arc;
        builder.add(input.get(), BytesRef.deepCopyOf(output));
    }
    final FST<BytesRef> fst = builder.finish();
    for (int arc = 0; arc < 6; arc++) {
        input.setIntAt(0, arc);
        final BytesRef result = Util.get(fst, input.get());
        assertNotNull(result);
        assertEquals(300, result.length);
        assertEquals(result.bytes[result.offset], arc);
        for (int byteIDX = 1; byteIDX < result.length; byteIDX++) {
            assertEquals(0, result.bytes[result.offset + byteIDX]);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 50 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testShortestPathsWFSTRandom.

/** like testShortestPathsRandom, but uses pairoutputs so we have both a weight and an output */
public void testShortestPathsWFSTRandom() throws Exception {
    int numWords = atLeast(1000);
    final TreeMap<String, TwoLongs> slowCompletor = new TreeMap<>();
    final TreeSet<String> allPrefixes = new TreeSet<>();
    PairOutputs<Long, Long> outputs = new PairOutputs<>(// weight
    PositiveIntOutputs.getSingleton(), // output
    PositiveIntOutputs.getSingleton());
    final Builder<Pair<Long, Long>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    Random random = random();
    for (int i = 0; i < numWords; i++) {
        String s;
        while (true) {
            s = TestUtil.randomSimpleString(random);
            if (!slowCompletor.containsKey(s)) {
                break;
            }
        }
        for (int j = 1; j < s.length(); j++) {
            allPrefixes.add(s.substring(0, j));
        }
        // weights 1..100
        int weight = TestUtil.nextInt(random, 1, 100);
        // outputs 0..500
        int output = TestUtil.nextInt(random, 0, 500);
        slowCompletor.put(s, new TwoLongs(weight, output));
    }
    for (Map.Entry<String, TwoLongs> e : slowCompletor.entrySet()) {
        //System.out.println("add: " + e);
        long weight = e.getValue().a;
        long output = e.getValue().b;
        builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), outputs.newPair(weight, output));
    }
    final FST<Pair<Long, Long>> fst = builder.finish();
    //System.out.println("SAVE out.dot");
    //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
    //Util.toDot(fst, w, false, false);
    //w.close();
    BytesReader reader = fst.getBytesReader();
    //System.out.println("testing: " + allPrefixes.size() + " prefixes");
    for (String prefix : allPrefixes) {
        // 1. run prefix against fst, then complete by value
        //System.out.println("TEST: " + prefix);
        Pair<Long, Long> prefixOutput = outputs.getNoOutput();
        FST.Arc<Pair<Long, Long>> arc = fst.getFirstArc(new FST.Arc<Pair<Long, Long>>());
        for (int idx = 0; idx < prefix.length(); idx++) {
            if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
                fail();
            }
            prefixOutput = outputs.add(prefixOutput, arc.output);
        }
        final int topN = TestUtil.nextInt(random, 1, 10);
        Util.TopResults<Pair<Long, Long>> r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(), minPairWeightComparator, topN, true);
        assertTrue(r.isComplete);
        // 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
        final List<Result<Pair<Long, Long>>> matches = new ArrayList<>();
        // TODO: could be faster... but it's slowCompletor for a reason
        for (Map.Entry<String, TwoLongs> e : slowCompletor.entrySet()) {
            if (e.getKey().startsWith(prefix)) {
                //System.out.println("  consider " + e.getKey());
                matches.add(new Result<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRefBuilder()), outputs.newPair(e.getValue().a - prefixOutput.output1, e.getValue().b - prefixOutput.output2)));
            }
        }
        assertTrue(matches.size() > 0);
        Collections.sort(matches, new TieBreakByInputComparator<>(minPairWeightComparator));
        if (matches.size() > topN) {
            matches.subList(topN, matches.size()).clear();
        }
        assertEquals(matches.size(), r.topN.size());
        for (int hit = 0; hit < r.topN.size(); hit++) {
            //System.out.println("  check hit " + hit);
            assertEquals(matches.get(hit).input, r.topN.get(hit).input);
            assertEquals(matches.get(hit).output, r.topN.get(hit).output);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ArrayList(java.util.ArrayList) TestUtil(org.apache.lucene.util.TestUtil) FSTTester.simpleRandomString(org.apache.lucene.util.fst.FSTTester.simpleRandomString) FSTTester.getRandomString(org.apache.lucene.util.fst.FSTTester.getRandomString) Result(org.apache.lucene.util.fst.Util.Result) Random(java.util.Random) TreeSet(java.util.TreeSet) BytesRef(org.apache.lucene.util.BytesRef) Pair(org.apache.lucene.util.fst.PairOutputs.Pair) TreeMap(java.util.TreeMap) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Map(java.util.Map) TreeMap(java.util.TreeMap)

Aggregations

IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)55 BytesRef (org.apache.lucene.util.BytesRef)32 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)25 IntsRef (org.apache.lucene.util.IntsRef)19 ArrayList (java.util.ArrayList)10 HashSet (java.util.HashSet)10 Builder (org.apache.lucene.util.fst.Builder)10 Arc (org.apache.lucene.util.fst.FST.Arc)9 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)8 Map (java.util.Map)7 HashMap (java.util.HashMap)5 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)5 CharsRef (org.apache.lucene.util.CharsRef)5 TestUtil (org.apache.lucene.util.TestUtil)5 FSTTester.getRandomString (org.apache.lucene.util.fst.FSTTester.getRandomString)5 FSTTester.simpleRandomString (org.apache.lucene.util.fst.FSTTester.simpleRandomString)5 TreeMap (java.util.TreeMap)4 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)4 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)4 IOException (java.io.IOException)3