Search in sources :

Example 31 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class FuzzySuggesterTest method toIntsRef.

private static IntsRef toIntsRef(String s) {
    // worst case
    IntsRef ref = new IntsRef(s.length());
    int utf16Len = s.length();
    for (int i = 0, cp = 0; i < utf16Len; i += Character.charCount(cp)) {
        cp = ref.ints[ref.length++] = Character.codePointAt(s, i);
    }
    return ref;
}
Also used : IntsRef(org.apache.lucene.util.IntsRef)

Example 32 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class Dictionary method affixFST.

private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOException {
    IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
    Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
    IntsRefBuilder scratch = new IntsRefBuilder();
    for (Map.Entry<String, List<Integer>> entry : affixes.entrySet()) {
        Util.toUTF32(entry.getKey(), scratch);
        List<Integer> entries = entry.getValue();
        IntsRef output = new IntsRef(entries.size());
        for (Integer c : entries) {
            output.ints[output.length++] = c;
        }
        builder.add(scratch.get(), output);
    }
    return builder.finish();
}
Also used : Builder(org.apache.lucene.util.fst.Builder) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) IntSequenceOutputs(org.apache.lucene.util.fst.IntSequenceOutputs) List(java.util.List) ArrayList(java.util.ArrayList) IntsRef(org.apache.lucene.util.IntsRef) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap)

Example 33 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class Dictionary method lookup.

IntsRef lookup(FST<IntsRef> fst, char[] word, int offset, int length) {
    if (fst == null) {
        return null;
    }
    final FST.BytesReader bytesReader = fst.getBytesReader();
    final FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
    // Accumulate output as we go
    final IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
    IntsRef output = NO_OUTPUT;
    int l = offset + length;
    try {
        for (int i = offset, cp = 0; i < l; i += Character.charCount(cp)) {
            cp = Character.codePointAt(word, i, l);
            if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) {
                return null;
            } else if (arc.output != NO_OUTPUT) {
                output = fst.outputs.add(output, arc.output);
            }
        }
        if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null) {
            return null;
        } else if (arc.output != NO_OUTPUT) {
            return fst.outputs.add(output, arc.output);
        } else {
            return output;
        }
    } catch (IOException bogus) {
        throw new RuntimeException(bogus);
    }
}
Also used : FST(org.apache.lucene.util.fst.FST) IntsRef(org.apache.lucene.util.IntsRef) IOException(java.io.IOException)

Example 34 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class TestDictionary method testCompressedBeforeSetDictionary.

public void testCompressedBeforeSetDictionary() throws Exception {
    InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
    InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
    Directory tempDir = getDirectory();
    Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
    assertEquals(3, dictionary.lookupSuffix(new char[] { 'e' }, 0, 1).length);
    assertEquals(1, dictionary.lookupPrefix(new char[] { 's' }, 0, 1).length);
    IntsRef ordList = dictionary.lookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
    BytesRef ref = new BytesRef();
    dictionary.flagLookup.get(ordList.ints[0], ref);
    char[] flags = Dictionary.decodeFlags(ref);
    assertEquals(1, flags.length);
    affixStream.close();
    dictStream.close();
    tempDir.close();
}
Also used : FilterInputStream(java.io.FilterInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 35 with IntsRef

use of org.apache.lucene.util.IntsRef in project lucene-solr by apache.

the class TestDictionary method testCompressedDictionary.

public void testCompressedDictionary() throws Exception {
    InputStream affixStream = getClass().getResourceAsStream("compressed.aff");
    InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
    Directory tempDir = getDirectory();
    Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
    assertEquals(3, dictionary.lookupSuffix(new char[] { 'e' }, 0, 1).length);
    assertEquals(1, dictionary.lookupPrefix(new char[] { 's' }, 0, 1).length);
    IntsRef ordList = dictionary.lookupWord(new char[] { 'o', 'l', 'r' }, 0, 3);
    BytesRef ref = new BytesRef();
    dictionary.flagLookup.get(ordList.ints[0], ref);
    char[] flags = Dictionary.decodeFlags(ref);
    assertEquals(1, flags.length);
    affixStream.close();
    dictStream.close();
    tempDir.close();
}
Also used : FilterInputStream(java.io.FilterInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

IntsRef (org.apache.lucene.util.IntsRef)63 BytesRef (org.apache.lucene.util.BytesRef)19 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)19 HashSet (java.util.HashSet)16 ArrayList (java.util.ArrayList)13 Automaton (org.apache.lucene.util.automaton.Automaton)13 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)12 TokenStreamToAutomaton (org.apache.lucene.analysis.TokenStreamToAutomaton)9 IOException (java.io.IOException)7 Directory (org.apache.lucene.store.Directory)7 HashMap (java.util.HashMap)5 FiniteStringsIterator (org.apache.lucene.util.automaton.FiniteStringsIterator)5 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)5 Pair (org.apache.lucene.util.fst.PairOutputs.Pair)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 FilterInputStream (java.io.FilterInputStream)4 InputStream (java.io.InputStream)4 Map (java.util.Map)4 Random (java.util.Random)4 TokenStream (org.apache.lucene.analysis.TokenStream)4