Search in sources :

Example 6 with Arc

use of org.apache.lucene.util.fst.FST.Arc in project lucene-solr by apache.

the class MemoryDocValuesProducer method getSortedSet.

@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    SortedSetEntry sortedSetEntry = sortedSets.get(field.name);
    if (sortedSetEntry.singleton) {
        return DocValues.singleton(getSorted(field));
    }
    final FSTEntry entry = fsts.get(field.name);
    if (entry.numOrds == 0) {
        // empty FST!
        return DocValues.emptySortedSet();
    }
    FST<Long> instance;
    synchronized (this) {
        instance = fstInstances.get(field.name);
        if (instance == null) {
            IndexInput data = this.data.clone();
            data.seek(entry.offset);
            instance = new FST<>(data, PositiveIntOutputs.getSingleton());
            if (!merging) {
                ramBytesUsed.addAndGet(instance.ramBytesUsed());
                fstInstances.put(field.name, instance);
            }
        }
    }
    final LegacyBinaryDocValues docToOrds = getLegacyBinary(field);
    final FST<Long> fst = instance;
    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    final ByteArrayDataInput input = new ByteArrayDataInput();
    return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {

        final BytesRefBuilder term = new BytesRefBuilder();

        BytesRef ref;

        long currentOrd;

        @Override
        public long nextOrd() {
            if (input.eof()) {
                return NO_MORE_ORDS;
            } else {
                currentOrd += input.readVLong();
                return currentOrd;
            }
        }

        @Override
        public void setDocument(int docID) {
            ref = docToOrds.get(docID);
            input.reset(ref.bytes, ref.offset, ref.length);
            currentOrd = 0;
        }

        @Override
        public BytesRef lookupOrd(long ord) {
            try {
                in.setPosition(0);
                fst.getFirstArc(firstArc);
                IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
                return Util.toBytesRef(output, term);
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);
            }
        }

        @Override
        public long lookupTerm(BytesRef key) {
            try {
                InputOutput<Long> o = fstEnum.seekCeil(key);
                if (o == null) {
                    return -getValueCount() - 1;
                } else if (o.input.equals(key)) {
                    return o.output.intValue();
                } else {
                    return -o.output - 1;
                }
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);
            }
        }

        @Override
        public long getValueCount() {
            return entry.numOrds;
        }

        @Override
        public TermsEnum termsEnum() {
            return new FSTTermsEnum(fst);
        }
    }, maxDoc);
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) InputOutput(org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput) IOException(java.io.IOException) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRefFSTEnum(org.apache.lucene.util.fst.BytesRefFSTEnum) ByteArrayDataInput(org.apache.lucene.store.ByteArrayDataInput) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Arc(org.apache.lucene.util.fst.FST.Arc) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 7 with Arc

use of org.apache.lucene.util.fst.FST.Arc in project lucene-solr by apache.

the class Util method toDot.

/**
   * Dumps an {@link FST} to a GraphViz's <code>dot</code> language description
   * for visualization. Example of use:
   * 
   * <pre class="prettyprint">
   * PrintWriter pw = new PrintWriter(&quot;out.dot&quot;);
   * Util.toDot(fst, pw, true, true);
   * pw.close();
   * </pre>
   * 
   * and then, from command line:
   * 
   * <pre>
   * dot -Tpng -o out.png out.dot
   * </pre>
   * 
   * <p>
   * Note: larger FSTs (a few thousand nodes) won't even
   * render, don't bother.  If the FST is &gt; 2.1 GB in size
   * then this method will throw strange exceptions.
   * 
   * @param sameRank
   *          If <code>true</code>, the resulting <code>dot</code> file will try
   *          to order states in layers of breadth-first traversal. This may
   *          mess up arcs, but makes the output FST's structure a bit clearer.
   * 
   * @param labelStates
   *          If <code>true</code> states will have labels equal to their offsets in their
   *          binary format. Expands the graph considerably. 
   * 
   * @see <a href="http://www.graphviz.org/">graphviz project</a>
   */
public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates) throws IOException {
    final String expandedNodeColor = "blue";
    // This is the start arc in the automaton (from the epsilon state to the first state 
    // with outgoing transitions.
    final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
    // A queue of transitions to consider for the next level.
    final List<FST.Arc<T>> thisLevelQueue = new ArrayList<>();
    // A queue of transitions to consider when processing the next level.
    final List<FST.Arc<T>> nextLevelQueue = new ArrayList<>();
    nextLevelQueue.add(startArc);
    //System.out.println("toDot: startArc: " + startArc);
    // A list of states on the same level (for ranking).
    final List<Integer> sameLevelStates = new ArrayList<>();
    // A bitset of already seen states (target offset).
    final BitSet seen = new BitSet();
    seen.set((int) startArc.target);
    // Shape for states.
    final String stateShape = "circle";
    final String finalStateShape = "doublecircle";
    // Emit DOT prologue.
    out.write("digraph FST {\n");
    out.write("  rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");
    if (!labelStates) {
        out.write("  node [shape=circle, width=.2, height=.2, style=filled]\n");
    }
    emitDotState(out, "initial", "point", "white", "");
    final T NO_OUTPUT = fst.outputs.getNoOutput();
    final BytesReader r = fst.getBytesReader();
    // final FST.Arc<T> scratchArc = new FST.Arc<>();
    {
        final String stateColor;
        if (fst.isExpandedTarget(startArc, r)) {
            stateColor = expandedNodeColor;
        } else {
            stateColor = null;
        }
        final boolean isFinal;
        final T finalOutput;
        if (startArc.isFinal()) {
            isFinal = true;
            finalOutput = startArc.nextFinalOutput == NO_OUTPUT ? null : startArc.nextFinalOutput;
        } else {
            isFinal = false;
            finalOutput = null;
        }
        emitDotState(out, Long.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
    }
    out.write("  initial -> " + startArc.target + "\n");
    int level = 0;
    while (!nextLevelQueue.isEmpty()) {
        // we could double buffer here, but it doesn't matter probably.
        //System.out.println("next level=" + level);
        thisLevelQueue.addAll(nextLevelQueue);
        nextLevelQueue.clear();
        level++;
        out.write("\n  // Transitions and states at level: " + level + "\n");
        while (!thisLevelQueue.isEmpty()) {
            final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
            //System.out.println("  pop: " + arc);
            if (FST.targetHasArcs(arc)) {
                // scan all target arcs
                //System.out.println("  readFirstTarget...");
                final long node = arc.target;
                fst.readFirstRealTargetArc(arc.target, arc, r);
                while (true) {
                    // Emit the unseen state and add it to the queue for the next level.
                    if (arc.target >= 0 && !seen.get((int) arc.target)) {
                        /*
              boolean isFinal = false;
              T finalOutput = null;
              fst.readFirstTargetArc(arc, scratchArc);
              if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
                // target is final
                isFinal = true;
                finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
                System.out.println("dot hit final label=" + (char) scratchArc.label);
              }
              */
                        final String stateColor;
                        if (fst.isExpandedTarget(arc, r)) {
                            stateColor = expandedNodeColor;
                        } else {
                            stateColor = null;
                        }
                        final String finalOutput;
                        if (arc.nextFinalOutput != null && arc.nextFinalOutput != NO_OUTPUT) {
                            finalOutput = fst.outputs.outputToString(arc.nextFinalOutput);
                        } else {
                            finalOutput = "";
                        }
                        emitDotState(out, Long.toString(arc.target), stateShape, stateColor, finalOutput);
                        // To see the node address, use this instead:
                        //emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
                        seen.set((int) arc.target);
                        nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
                        sameLevelStates.add((int) arc.target);
                    }
                    String outs;
                    if (arc.output != NO_OUTPUT) {
                        outs = "/" + fst.outputs.outputToString(arc.output);
                    } else {
                        outs = "";
                    }
                    if (!FST.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
                        // Tricky special case: sometimes, due to
                        // pruning, the builder can [sillily] produce
                        // an FST with an arc into the final end state
                        // (-1) but also with a next final output; in
                        // this case we pull that output up onto this
                        // arc
                        outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]";
                    }
                    final String arcColor;
                    if (arc.flag(FST.BIT_TARGET_NEXT)) {
                        arcColor = "red";
                    } else {
                        arcColor = "black";
                    }
                    assert arc.label != FST.END_LABEL;
                    out.write("  " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "") + " color=\"" + arcColor + "\"]\n");
                    // Break the loop if we're on the last arc of this state.
                    if (arc.isLast()) {
                        //System.out.println("    break");
                        break;
                    }
                    fst.readNextRealArc(arc, r);
                }
            }
        }
        // Emit state ranking information.
        if (sameRank && sameLevelStates.size() > 1) {
            out.write("  {rank=same; ");
            for (int state : sameLevelStates) {
                out.write(state + "; ");
            }
            out.write(" }\n");
        }
        sameLevelStates.clear();
    }
    // Emit terminating state (always there anyway).
    out.write("  -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
    out.write("  {rank=sink; -1 }\n");
    out.write("}\n");
    out.flush();
}
Also used : ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Arc(org.apache.lucene.util.fst.FST.Arc)

Example 8 with Arc

use of org.apache.lucene.util.fst.FST.Arc in project lucene-solr by apache.

the class MemoryDocValuesProducer method getSortedNonIterator.

private LegacySortedDocValues getSortedNonIterator(FieldInfo field) throws IOException {
    final FSTEntry entry = fsts.get(field.name);
    if (entry.numOrds == 0) {
        return DocValues.emptyLegacySorted();
    }
    FST<Long> instance;
    synchronized (this) {
        instance = fstInstances.get(field.name);
        if (instance == null) {
            IndexInput data = this.data.clone();
            data.seek(entry.offset);
            instance = new FST<>(data, PositiveIntOutputs.getSingleton());
            if (!merging) {
                ramBytesUsed.addAndGet(instance.ramBytesUsed());
                fstInstances.put(field.name, instance);
            }
        }
    }
    final LegacyNumericDocValues docToOrd = getNumericNonIterator(field);
    final FST<Long> fst = instance;
    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    return new LegacySortedDocValues() {

        final BytesRefBuilder term = new BytesRefBuilder();

        @Override
        public int getOrd(int docID) {
            return (int) docToOrd.get(docID);
        }

        @Override
        public BytesRef lookupOrd(int ord) {
            try {
                in.setPosition(0);
                fst.getFirstArc(firstArc);
                IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
                return Util.toBytesRef(output, term);
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);
            }
        }

        @Override
        public int lookupTerm(BytesRef key) {
            try {
                InputOutput<Long> o = fstEnum.seekCeil(key);
                if (o == null) {
                    return -getValueCount() - 1;
                } else if (o.input.equals(key)) {
                    return o.output.intValue();
                } else {
                    return (int) -o.output - 1;
                }
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);
            }
        }

        @Override
        public int getValueCount() {
            return (int) entry.numOrds;
        }

        @Override
        public TermsEnum termsEnum() {
            return new FSTTermsEnum(fst);
        }
    };
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IOException(java.io.IOException) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRefFSTEnum(org.apache.lucene.util.fst.BytesRefFSTEnum) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Arc(org.apache.lucene.util.fst.FST.Arc) AtomicLong(java.util.concurrent.atomic.AtomicLong) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

Arc (org.apache.lucene.util.fst.FST.Arc)8 BytesReader (org.apache.lucene.util.fst.FST.BytesReader)7 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)6 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)6 ArrayList (java.util.ArrayList)5 IOException (java.io.IOException)4 BytesRef (org.apache.lucene.util.BytesRef)4 IntsRef (org.apache.lucene.util.IntsRef)3 HashSet (java.util.HashSet)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)2 IndexInput (org.apache.lucene.store.IndexInput)2 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)2 BytesRefFSTEnum (org.apache.lucene.util.fst.BytesRefFSTEnum)2 FSTTester.getRandomString (org.apache.lucene.util.fst.FSTTester.getRandomString)2 FSTTester.simpleRandomString (org.apache.lucene.util.fst.FSTTester.simpleRandomString)2 BitSet (java.util.BitSet)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)1 PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)1