Example 6 with Arc

the class MemoryDocValuesProducer method getSortedSet.

public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    SortedSetEntry sortedSetEntry = sortedSets.get(;
    if (sortedSetEntry.singleton) {
        return DocValues.singleton(getSorted(field));
    final FSTEntry entry = fsts.get(;
    if (entry.numOrds == 0) {
        // empty FST!
        return DocValues.emptySortedSet();
    FST<Long> instance;
    synchronized (this) {
        instance = fstInstances.get(;
        if (instance == null) {
            IndexInput data =;
            instance = new FST<>(data, PositiveIntOutputs.getSingleton());
            if (!merging) {
                fstInstances.put(, instance);
    final LegacyBinaryDocValues docToOrds = getLegacyBinary(field);
    final FST<Long> fst = instance;
    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    final ByteArrayDataInput input = new ByteArrayDataInput();
    return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {

        final BytesRefBuilder term = new BytesRefBuilder();

        BytesRef ref;

        long currentOrd;

        public long nextOrd() {
            if (input.eof()) {
                return NO_MORE_ORDS;
            } else {
                currentOrd += input.readVLong();
                return currentOrd;

        public void setDocument(int docID) {
            ref = docToOrds.get(docID);
            input.reset(ref.bytes, ref.offset, ref.length);
            currentOrd = 0;

        public BytesRef lookupOrd(long ord) {
            try {
                IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
                return Util.toBytesRef(output, term);
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);

        public long lookupTerm(BytesRef key) {
            try {
                InputOutput<Long> o = fstEnum.seekCeil(key);
                if (o == null) {
                    return -getValueCount() - 1;
                } else if (o.input.equals(key)) {
                    return o.output.intValue();
                } else {
                    return -o.output - 1;
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);

        public long getValueCount() {
            return entry.numOrds;

        public TermsEnum termsEnum() {
            return new FSTTermsEnum(fst);
    }, maxDoc);
Also used : ChecksumIndexInput( IndexInput( IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) InputOutput(org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput) IOException( IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRefFSTEnum(org.apache.lucene.util.fst.BytesRefFSTEnum) ByteArrayDataInput( BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Arc(org.apache.lucene.util.fst.FST.Arc) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 7 with Arc

the class Util method toDot.

   * Dumps an {@link FST} to a GraphViz's <code>dot</code> language description
   * for visualization. Example of use:
   * <pre class="prettyprint">
   * PrintWriter pw = new PrintWriter(&quot;;);
   * Util.toDot(fst, pw, true, true);
   * pw.close();
   * </pre>
   * and then, from command line:
   * <pre>
   * dot -Tpng -o out.png
   * </pre>
   * <p>
   * Note: larger FSTs (a few thousand nodes) won't even
   * render, don't bother.  If the FST is &gt; 2.1 GB in size
   * then this method will throw strange exceptions.
   * @param sameRank
   *          If <code>true</code>, the resulting <code>dot</code> file will try
   *          to order states in layers of breadth-first traversal. This may
   *          mess up arcs, but makes the output FST's structure a bit clearer.
   * @param labelStates
   *          If <code>true</code> states will have labels equal to their offsets in their
   *          binary format. Expands the graph considerably. 
   * @see <a href="">graphviz project</a>
public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates) throws IOException {
    final String expandedNodeColor = "blue";
    // This is the start arc in the automaton (from the epsilon state to the first state 
    // with outgoing transitions.
    final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
    // A queue of transitions to consider for the next level.
    final List<FST.Arc<T>> thisLevelQueue = new ArrayList<>();
    // A queue of transitions to consider when processing the next level.
    final List<FST.Arc<T>> nextLevelQueue = new ArrayList<>();
    //System.out.println("toDot: startArc: " + startArc);
    // A list of states on the same level (for ranking).
    final List<Integer> sameLevelStates = new ArrayList<>();
    // A bitset of already seen states (target offset).
    final BitSet seen = new BitSet();
    // Shape for states.
    final String stateShape = "circle";
    final String finalStateShape = "doublecircle";
    // Emit DOT prologue.
    out.write("digraph FST {\n");
    out.write("  rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");
    if (!labelStates) {
        out.write("  node [shape=circle, width=.2, height=.2, style=filled]\n");
    emitDotState(out, "initial", "point", "white", "");
    final T NO_OUTPUT = fst.outputs.getNoOutput();
    final BytesReader r = fst.getBytesReader();
    // final FST.Arc<T> scratchArc = new FST.Arc<>();
        final String stateColor;
        if (fst.isExpandedTarget(startArc, r)) {
            stateColor = expandedNodeColor;
        } else {
            stateColor = null;
        final boolean isFinal;
        final T finalOutput;
        if (startArc.isFinal()) {
            isFinal = true;
            finalOutput = startArc.nextFinalOutput == NO_OUTPUT ? null : startArc.nextFinalOutput;
        } else {
            isFinal = false;
            finalOutput = null;
        emitDotState(out, Long.toString(, isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
    out.write("  initial -> " + + "\n");
    int level = 0;
    while (!nextLevelQueue.isEmpty()) {
        // we could double buffer here, but it doesn't matter probably.
        //System.out.println("next level=" + level);
        out.write("\n  // Transitions and states at level: " + level + "\n");
        while (!thisLevelQueue.isEmpty()) {
            final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
            //System.out.println("  pop: " + arc);
            if (FST.targetHasArcs(arc)) {
                // scan all target arcs
                //System.out.println("  readFirstTarget...");
                final long node =;
                fst.readFirstRealTargetArc(, arc, r);
                while (true) {
                    // Emit the unseen state and add it to the queue for the next level.
                    if ( >= 0 && !seen.get((int) {
              boolean isFinal = false;
              T finalOutput = null;
              fst.readFirstTargetArc(arc, scratchArc);
              if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
                // target is final
                isFinal = true;
                finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
                System.out.println("dot hit final label=" + (char) scratchArc.label);
                        final String stateColor;
                        if (fst.isExpandedTarget(arc, r)) {
                            stateColor = expandedNodeColor;
                        } else {
                            stateColor = null;
                        final String finalOutput;
                        if (arc.nextFinalOutput != null && arc.nextFinalOutput != NO_OUTPUT) {
                            finalOutput = fst.outputs.outputToString(arc.nextFinalOutput);
                        } else {
                            finalOutput = "";
                        emitDotState(out, Long.toString(, stateShape, stateColor, finalOutput);
                        // To see the node address, use this instead:
                        //emitDotState(out, Integer.toString(, stateShape, stateColor, String.valueOf(;
                        nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
                    String outs;
                    if (arc.output != NO_OUTPUT) {
                        outs = "/" + fst.outputs.outputToString(arc.output);
                    } else {
                        outs = "";
                    if (!FST.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
                        // Tricky special case: sometimes, due to
                        // pruning, the builder can [sillily] produce
                        // an FST with an arc into the final end state
                        // (-1) but also with a next final output; in
                        // this case we pull that output up onto this
                        // arc
                        outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]";
                    final String arcColor;
                    if (arc.flag(FST.BIT_TARGET_NEXT)) {
                        arcColor = "red";
                    } else {
                        arcColor = "black";
                    assert arc.label != FST.END_LABEL;
                    out.write("  " + node + " -> " + + " [label=\"" + printableLabel(arc.label) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "") + " color=\"" + arcColor + "\"]\n");
                    // Break the loop if we're on the last arc of this state.
                    if (arc.isLast()) {
                        //System.out.println("    break");
                    fst.readNextRealArc(arc, r);
        // Emit state ranking information.
        if (sameRank && sameLevelStates.size() > 1) {
            out.write("  {rank=same; ");
            for (int state : sameLevelStates) {
                out.write(state + "; ");
            out.write(" }\n");
    // Emit terminating state (always there anyway).
    out.write("  -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
    out.write("  {rank=sink; -1 }\n");
Also used : ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Arc(org.apache.lucene.util.fst.FST.Arc)

Example 8 with Arc

use of org.apache.lucene.util.fst.FST.Arc in project lucene-solr by apache.

the class MemoryDocValuesProducer method getSortedNonIterator.

private LegacySortedDocValues getSortedNonIterator(FieldInfo field) throws IOException {
    final FSTEntry entry = fsts.get(;
    if (entry.numOrds == 0) {
        return DocValues.emptyLegacySorted();
    FST<Long> instance;
    synchronized (this) {
        instance = fstInstances.get(;
        if (instance == null) {
            IndexInput data =;
            instance = new FST<>(data, PositiveIntOutputs.getSingleton());
            if (!merging) {
                fstInstances.put(, instance);
    final LegacyNumericDocValues docToOrd = getNumericNonIterator(field);
    final FST<Long> fst = instance;
    // per-thread resources
    final BytesReader in = fst.getBytesReader();
    final Arc<Long> firstArc = new Arc<>();
    final Arc<Long> scratchArc = new Arc<>();
    final IntsRefBuilder scratchInts = new IntsRefBuilder();
    final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
    return new LegacySortedDocValues() {

        final BytesRefBuilder term = new BytesRefBuilder();

        public int getOrd(int docID) {
            return (int) docToOrd.get(docID);

        public BytesRef lookupOrd(int ord) {
            try {
                IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
                return Util.toBytesRef(output, term);
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);

        public int lookupTerm(BytesRef key) {
            try {
                InputOutput<Long> o = fstEnum.seekCeil(key);
                if (o == null) {
                    return -getValueCount() - 1;
                } else if (o.input.equals(key)) {
                    return o.output.intValue();
                } else {
                    return (int) -o.output - 1;
            } catch (IOException bogus) {
                throw new RuntimeException(bogus);

        public int getValueCount() {
            return (int) entry.numOrds;

        public TermsEnum termsEnum() {
            return new FSTTermsEnum(fst);
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IOException( IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BytesRefFSTEnum(org.apache.lucene.util.fst.BytesRefFSTEnum) BytesReader(org.apache.lucene.util.fst.FST.BytesReader) Arc(org.apache.lucene.util.fst.FST.Arc) AtomicLong(java.util.concurrent.atomic.AtomicLong) ChecksumIndexInput( IndexInput( IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef)


