Search in sources :

Example 31 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class BooleanPerceptronClassifier method updateFST.

private void updateFST(SortedMap<String, Double> weights) throws IOException {
    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
    BytesRefBuilder scratchBytes = new BytesRefBuilder();
    IntsRefBuilder scratchInts = new IntsRefBuilder();
    for (Map.Entry<String, Double> entry : weights.entrySet()) {
        scratchBytes.copyChars(entry.getKey());
        fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry.getValue().longValue());
    }
    fst = fstBuilder.finish();
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) PositiveIntOutputs(org.apache.lucene.util.fst.PositiveIntOutputs) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) Builder(org.apache.lucene.util.fst.Builder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Map(java.util.Map) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) SortedMap(java.util.SortedMap)

Example 32 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestAutomaton method toIntsRef.

private static IntsRef toIntsRef(String s) {
    IntsRefBuilder b = new IntsRefBuilder();
    for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
        cp = s.codePointAt(i);
        b.append(cp);
    }
    return b.get();
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder)

Example 33 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestAutomaton method assertSame.

private void assertSame(Collection<BytesRef> terms, Automaton a) {
    try {
        assertTrue(Operations.isFinite(a));
        assertFalse(Operations.isTotal(a));
        Automaton detA = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
        // Make sure all terms are accepted:
        IntsRefBuilder scratch = new IntsRefBuilder();
        for (BytesRef term : terms) {
            Util.toIntsRef(term, scratch);
            assertTrue("failed to accept term=" + term.utf8ToString(), Operations.run(detA, term.utf8ToString()));
        }
        // Use getFiniteStrings:
        Set<IntsRef> expected = new HashSet<>();
        for (BytesRef term : terms) {
            IntsRefBuilder intsRef = new IntsRefBuilder();
            Util.toUTF32(term.utf8ToString(), intsRef);
            expected.add(intsRef.toIntsRef());
        }
        Set<IntsRef> actual = TestOperations.getFiniteStrings(a);
        if (expected.equals(actual) == false) {
            System.out.println("FAILED:");
            for (IntsRef term : expected) {
                if (actual.contains(term) == false) {
                    System.out.println("  term=" + term + " should be accepted but isn't");
                }
            }
            for (IntsRef term : actual) {
                if (expected.contains(term) == false) {
                    System.out.println("  term=" + term + " is accepted but should not be");
                }
            }
            throw new AssertionError("mismatch");
        }
        // Use sameLanguage:
        Automaton a2 = Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE));
        assertTrue(Operations.sameLanguage(a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE))));
        // Do same check, in UTF8 space
        Automaton utf8 = randomNoOp(new UTF32ToUTF8().convert(a));
        Set<IntsRef> expected2 = new HashSet<>();
        for (BytesRef term : terms) {
            IntsRefBuilder intsRef = new IntsRefBuilder();
            Util.toIntsRef(term, intsRef);
            expected2.add(intsRef.toIntsRef());
        }
        assertEquals(expected2, TestOperations.getFiniteStrings(utf8));
    } catch (AssertionError ae) {
        System.out.println("TEST: FAILED: not same");
        System.out.println("  terms (count=" + terms.size() + "):");
        for (BytesRef term : terms) {
            System.out.println("    " + term);
        }
        System.out.println("  automaton:");
        System.out.println(a.toDot());
        //a.writeDot("fail");
        throw ae;
    }
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet)

Example 34 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestAutomaton method assertMatches.

private void assertMatches(Automaton a, String... strings) {
    Set<IntsRef> expected = new HashSet<>();
    for (String s : strings) {
        IntsRefBuilder ints = new IntsRefBuilder();
        expected.add(Util.toUTF32(s, ints));
    }
    assertEquals(expected, TestOperations.getFiniteStrings(Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES)));
}
Also used : IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) HashSet(java.util.HashSet)

Example 35 with IntsRefBuilder

use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.

the class TestAutomaton method accepts.

private boolean accepts(Automaton a, BytesRef b) {
    IntsRefBuilder intsBuilder = new IntsRefBuilder();
    Util.toIntsRef(b, intsBuilder);
    return Operations.run(a, intsBuilder.toIntsRef());
}
Also used : IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder)

Aggregations

IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)55 BytesRef (org.apache.lucene.util.BytesRef)32 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)25 IntsRef (org.apache.lucene.util.IntsRef)19 ArrayList (java.util.ArrayList)10 HashSet (java.util.HashSet)10 Builder (org.apache.lucene.util.fst.Builder)10 Arc (org.apache.lucene.util.fst.FST.Arc)9 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)8 Map (java.util.Map)7 HashMap (java.util.HashMap)5 ByteArrayDataInput (org.apache.lucene.store.ByteArrayDataInput)5 CharsRef (org.apache.lucene.util.CharsRef)5 TestUtil (org.apache.lucene.util.TestUtil)5 FSTTester.getRandomString (org.apache.lucene.util.fst.FSTTester.getRandomString)5 FSTTester.simpleRandomString (org.apache.lucene.util.fst.FSTTester.simpleRandomString)5 TreeMap (java.util.TreeMap)4 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)4 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)4 IOException (java.io.IOException)3