Search in sources :

Example 11 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testIllegallyModifyRootArc.

public void testIllegallyModifyRootArc() throws Exception {
    assumeTrue("test relies on assertions", assertsAreEnabled);
    Set<BytesRef> terms = new HashSet<>();
    for (int i = 0; i < 100; i++) {
        String prefix = Character.toString((char) ('a' + i));
        terms.add(new BytesRef(prefix));
        if (prefix.equals("m") == false) {
            for (int j = 0; j < 20; j++) {
                // Make a big enough FST that the root cache will be created:
                String suffix = TestUtil.randomRealisticUnicodeString(random(), 10, 20);
                terms.add(new BytesRef(prefix + suffix));
            }
        }
    }
    List<BytesRef> termsList = new ArrayList<>(terms);
    Collections.sort(termsList);
    ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
    Builder<BytesRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    IntsRefBuilder input = new IntsRefBuilder();
    for (BytesRef term : termsList) {
        Util.toIntsRef(term, input);
        builder.add(input.get(), term);
    }
    FST<BytesRef> fst = builder.finish();
    Arc<BytesRef> arc = new FST.Arc<>();
    fst.getFirstArc(arc);
    FST.BytesReader reader = fst.getBytesReader();
    arc = fst.findTargetArc((int) 'm', arc, arc, reader);
    assertNotNull(arc);
    assertEquals(new BytesRef("m"), arc.output);
    // NOTE: illegal:
    arc.output.length = 0;
    fst.getFirstArc(arc);
    try {
        arc = fst.findTargetArc((int) 'm', arc, arc, reader);
    } catch (AssertionError ae) {
    // expected
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ArrayList(java.util.ArrayList) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) FSTTester.simpleRandomString(org.apache.lucene.util.fst.FSTTester.simpleRandomString) FSTTester.getRandomString(org.apache.lucene.util.fst.FSTTester.getRandomString) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Arc(org.apache.lucene.util.fst.FST.Arc) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet)

Example 12 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testShortestPathsRandom.

public void testShortestPathsRandom() throws Exception {
    final Random random = random();
    int numWords = atLeast(1000);
    final TreeMap<String, Long> slowCompletor = new TreeMap<>();
    final TreeSet<String> allPrefixes = new TreeSet<>();
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    for (int i = 0; i < numWords; i++) {
        String s;
        while (true) {
            s = TestUtil.randomSimpleString(random);
            if (!slowCompletor.containsKey(s)) {
                break;
            }
        }
        for (int j = 1; j < s.length(); j++) {
            allPrefixes.add(s.substring(0, j));
        }
        // weights 1..100
        int weight = TestUtil.nextInt(random, 1, 100);
        slowCompletor.put(s, (long) weight);
    }
    for (Map.Entry<String, Long> e : slowCompletor.entrySet()) {
        //System.out.println("add: " + e);
        builder.add(Util.toIntsRef(new BytesRef(e.getKey()), scratch), e.getValue());
    }
    final FST<Long> fst = builder.finish();
    //System.out.println("SAVE out.dot");
    //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
    //Util.toDot(fst, w, false, false);
    //w.close();
    BytesReader reader = fst.getBytesReader();
    //System.out.println("testing: " + allPrefixes.size() + " prefixes");
    for (String prefix : allPrefixes) {
        // 1. run prefix against fst, then complete by value
        //System.out.println("TEST: " + prefix);
        long prefixOutput = 0;
        FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
        for (int idx = 0; idx < prefix.length(); idx++) {
            if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
                fail();
            }
            prefixOutput += arc.output;
        }
        final int topN = TestUtil.nextInt(random, 1, 10);
        Util.TopResults<Long> r = Util.shortestPaths(fst, arc, fst.outputs.getNoOutput(), minLongComparator, topN, true);
        assertTrue(r.isComplete);
        // 2. go thru whole treemap (slowCompletor) and check it's actually the best suggestion
        final List<Result<Long>> matches = new ArrayList<>();
        // TODO: could be faster... but it's slowCompletor for a reason
        for (Map.Entry<String, Long> e : slowCompletor.entrySet()) {
            if (e.getKey().startsWith(prefix)) {
                //System.out.println("  consider " + e.getKey());
                matches.add(new Result<>(Util.toIntsRef(new BytesRef(e.getKey().substring(prefix.length())), new IntsRefBuilder()), e.getValue() - prefixOutput));
            }
        }
        assertTrue(matches.size() > 0);
        Collections.sort(matches, new TieBreakByInputComparator<>(minLongComparator));
        if (matches.size() > topN) {
            matches.subList(topN, matches.size()).clear();
        }
        assertEquals(matches.size(), r.topN.size());
        for (int hit = 0; hit < r.topN.size(); hit++) {
            //System.out.println("  check hit " + hit);
            assertEquals(matches.get(hit).input, r.topN.get(hit).input);
            assertEquals(matches.get(hit).output, r.topN.get(hit).output);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) ArrayList(java.util.ArrayList) TestUtil(org.apache.lucene.util.TestUtil) FSTTester.simpleRandomString(org.apache.lucene.util.fst.FSTTester.simpleRandomString) FSTTester.getRandomString(org.apache.lucene.util.fst.FSTTester.getRandomString) Result(org.apache.lucene.util.fst.Util.Result) Random(java.util.Random) TreeSet(java.util.TreeSet) BytesRef(org.apache.lucene.util.BytesRef) TreeMap(java.util.TreeMap) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 13 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestFSTs method testShortestPaths.

public void testShortestPaths() throws Exception {
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    final IntsRefBuilder scratch = new IntsRefBuilder();
    builder.add(Util.toIntsRef(new BytesRef("aab"), scratch), 22L);
    builder.add(Util.toIntsRef(new BytesRef("aac"), scratch), 7L);
    builder.add(Util.toIntsRef(new BytesRef("ax"), scratch), 17L);
    final FST<Long> fst = builder.finish();
    //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
    //Util.toDot(fst, w, false, false);
    //w.close();
    Util.TopResults<Long> res = Util.shortestPaths(fst, fst.getFirstArc(new FST.Arc<Long>()), outputs.getNoOutput(), minLongComparator, 3, true);
    assertTrue(res.isComplete);
    assertEquals(3, res.topN.size());
    assertEquals(Util.toIntsRef(new BytesRef("aac"), scratch), res.topN.get(0).input);
    assertEquals(7L, res.topN.get(0).output.longValue());
    assertEquals(Util.toIntsRef(new BytesRef("ax"), scratch), res.topN.get(1).input);
    assertEquals(17L, res.topN.get(1).output.longValue());
    assertEquals(Util.toIntsRef(new BytesRef("aab"), scratch), res.topN.get(2).input);
    assertEquals(22L, res.topN.get(2).output.longValue());
}
Also used : Arc(org.apache.lucene.util.fst.FST.Arc) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) TestUtil(org.apache.lucene.util.TestUtil) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Example 14 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testFiniteStringsEatsStack.

public void testFiniteStringsEatsStack() {
    char[] chars = new char[50000];
    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
    String bigString1 = new String(chars);
    TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
    String bigString2 = new String(chars);
    Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertEquals(2, actual.size());
    IntsRefBuilder scratch = new IntsRefBuilder();
    Util.toUTF32(bigString1.toCharArray(), 0, bigString1.length(), scratch);
    assertTrue(actual.contains(scratch.get()));
    Util.toUTF32(bigString2.toCharArray(), 0, bigString2.length(), scratch);
    assertTrue(actual.contains(scratch.get()));
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder)

Example 15 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class FiniteStringsIteratorTest method testShortAccept.

public void testShortAccept() {
    Automaton a = Operations.union(Automata.makeString("x"), Automata.makeString("xy"));
    a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
    FiniteStringsIterator iterator = new FiniteStringsIterator(a);
    List<IntsRef> actual = getFiniteStrings(iterator);
    assertEquals(2, actual.size());
    IntsRefBuilder x = new IntsRefBuilder();
    Util.toIntsRef(new BytesRef("x"), x);
    assertTrue(actual.contains(x.get()));
    IntsRefBuilder xy = new IntsRefBuilder();
    Util.toIntsRef(new BytesRef("xy"), xy);
    assertTrue(actual.contains(xy.get()));
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)55 BytesRef (org.apache.lucene.util.BytesRef)32 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)25 IntsRef (org.apache.lucene.util.IntsRef)19 ArrayList (java.util.ArrayList)10 HashSet (java.util.HashSet)10 Builder (org.apache.lucene.util.fst.Builder)10 Arc (org.apache.lucene.util.fst.FST.Arc)9 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)8 Map (java.util.Map)7 HashMap (java.util.HashMap)5 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)5 CharsRef (org.apache.lucene.util.CharsRef)5 TestUtil (org.apache.lucene.util.TestUtil)5 FSTTester.getRandomString (org.apache.lucene.util.fst.FSTTester.getRandomString)5 FSTTester.simpleRandomString (org.apache.lucene.util.fst.FSTTester.simpleRandomString)5 TreeMap (java.util.TreeMap)4 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)4 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)4 IOException (java.io.IOException)3