Search in sources :

Example 61 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class SimpleTextLiveDocsFormat method readLiveDocs.

@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
    assert info.hasDeletions();
    BytesRefBuilder scratch = new BytesRefBuilder();
    CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
    String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen());
    ChecksumIndexInput in = null;
    boolean success = false;
    try {
        in = dir.openChecksumInput(fileName, context);
        SimpleTextUtil.readLine(in, scratch);
        assert StringHelper.startsWith(scratch.get(), SIZE);
        int size = parseIntAt(scratch.get(), SIZE.length, scratchUTF16);
        BitSet bits = new BitSet(size);
        SimpleTextUtil.readLine(in, scratch);
        while (!scratch.get().equals(END)) {
            assert StringHelper.startsWith(scratch.get(), DOC);
            int docid = parseIntAt(scratch.get(), DOC.length, scratchUTF16);
            bits.set(docid);
            SimpleTextUtil.readLine(in, scratch);
        }
        SimpleTextUtil.checkFooter(in);
        success = true;
        return new SimpleTextBits(bits, size);
    } finally {
        if (success) {
            IOUtils.close(in);
        } else {
            IOUtils.closeWhileHandlingException(in);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BitSet(java.util.BitSet) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder)

Example 62 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class SimpleTextLiveDocsFormat method writeLiveDocs.

@Override
public void writeLiveDocs(MutableBits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
    BitSet set = ((SimpleTextBits) bits).bits;
    int size = bits.length();
    BytesRefBuilder scratch = new BytesRefBuilder();
    String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getNextDelGen());
    IndexOutput out = null;
    boolean success = false;
    try {
        out = dir.createOutput(fileName, context);
        SimpleTextUtil.write(out, SIZE);
        SimpleTextUtil.write(out, Integer.toString(size), scratch);
        SimpleTextUtil.writeNewline(out);
        for (int i = set.nextSetBit(0); i >= 0; i = set.nextSetBit(i + 1)) {
            SimpleTextUtil.write(out, DOC);
            SimpleTextUtil.write(out, Integer.toString(i), scratch);
            SimpleTextUtil.writeNewline(out);
        }
        SimpleTextUtil.write(out, END);
        SimpleTextUtil.writeNewline(out);
        SimpleTextUtil.writeChecksum(out, scratch);
        success = true;
    } finally {
        if (success) {
            IOUtils.close(out);
        } else {
            IOUtils.closeWhileHandlingException(out);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) BitSet(java.util.BitSet) IndexOutput(org.apache.lucene.store.IndexOutput)

Example 63 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class SimpleTextPointsReader method checkIntegrity.

@Override
public void checkIntegrity() throws IOException {
    BytesRefBuilder scratch = new BytesRefBuilder();
    IndexInput clone = dataIn.clone();
    clone.seek(0);
    // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM):
    long footerStartPos = dataIn.length() - (SimpleTextUtil.CHECKSUM.length + 21);
    ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
    while (true) {
        SimpleTextUtil.readLine(input, scratch);
        if (input.getFilePointer() >= footerStartPos) {
            // Make sure we landed at precisely the right location:
            if (input.getFilePointer() != footerStartPos) {
                throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input);
            }
            SimpleTextUtil.checkFooter(input);
            break;
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) IndexInput(org.apache.lucene.store.IndexInput) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 64 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class SimpleTextSegmentInfoFormat method write.

@Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
    String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
    try (IndexOutput output = dir.createOutput(segFileName, ioContext)) {
        // Only add the file once we've successfully created it, else IFD assert can trip:
        si.addFile(segFileName);
        BytesRefBuilder scratch = new BytesRefBuilder();
        SimpleTextUtil.write(output, SI_VERSION);
        SimpleTextUtil.write(output, si.getVersion().toString(), scratch);
        SimpleTextUtil.writeNewline(output);
        SimpleTextUtil.write(output, SI_MIN_VERSION);
        if (si.getMinVersion() == null) {
            SimpleTextUtil.write(output, "null", scratch);
        } else {
            SimpleTextUtil.write(output, si.getMinVersion().toString(), scratch);
        }
        SimpleTextUtil.writeNewline(output);
        SimpleTextUtil.write(output, SI_DOCCOUNT);
        SimpleTextUtil.write(output, Integer.toString(si.maxDoc()), scratch);
        SimpleTextUtil.writeNewline(output);
        SimpleTextUtil.write(output, SI_USECOMPOUND);
        SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
        SimpleTextUtil.writeNewline(output);
        Map<String, String> diagnostics = si.getDiagnostics();
        int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
        SimpleTextUtil.write(output, SI_NUM_DIAG);
        SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
        SimpleTextUtil.writeNewline(output);
        if (numDiagnostics > 0) {
            for (Map.Entry<String, String> diagEntry : diagnostics.entrySet()) {
                SimpleTextUtil.write(output, SI_DIAG_KEY);
                SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
                SimpleTextUtil.writeNewline(output);
                SimpleTextUtil.write(output, SI_DIAG_VALUE);
                SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
                SimpleTextUtil.writeNewline(output);
            }
        }
        Map<String, String> attributes = si.getAttributes();
        SimpleTextUtil.write(output, SI_NUM_ATT);
        SimpleTextUtil.write(output, Integer.toString(attributes.size()), scratch);
        SimpleTextUtil.writeNewline(output);
        for (Map.Entry<String, String> attEntry : attributes.entrySet()) {
            SimpleTextUtil.write(output, SI_ATT_KEY);
            SimpleTextUtil.write(output, attEntry.getKey(), scratch);
            SimpleTextUtil.writeNewline(output);
            SimpleTextUtil.write(output, SI_ATT_VALUE);
            SimpleTextUtil.write(output, attEntry.getValue(), scratch);
            SimpleTextUtil.writeNewline(output);
        }
        Set<String> files = si.files();
        int numFiles = files == null ? 0 : files.size();
        SimpleTextUtil.write(output, SI_NUM_FILES);
        SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
        SimpleTextUtil.writeNewline(output);
        if (numFiles > 0) {
            for (String fileName : files) {
                SimpleTextUtil.write(output, SI_FILE);
                SimpleTextUtil.write(output, fileName, scratch);
                SimpleTextUtil.writeNewline(output);
            }
        }
        SimpleTextUtil.write(output, SI_ID);
        SimpleTextUtil.write(output, new BytesRef(si.getId()));
        SimpleTextUtil.writeNewline(output);
        Sort indexSort = si.getIndexSort();
        SimpleTextUtil.write(output, SI_SORT);
        final int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
        SimpleTextUtil.write(output, Integer.toString(numSortFields), scratch);
        SimpleTextUtil.writeNewline(output);
        for (int i = 0; i < numSortFields; ++i) {
            final SortField sortField = indexSort.getSort()[i];
            SimpleTextUtil.write(output, SI_SORT_FIELD);
            SimpleTextUtil.write(output, sortField.getField(), scratch);
            SimpleTextUtil.writeNewline(output);
            SimpleTextUtil.write(output, SI_SORT_TYPE);
            final String sortTypeString;
            final SortField.Type sortType;
            final boolean multiValued;
            if (sortField instanceof SortedSetSortField) {
                sortType = SortField.Type.STRING;
                multiValued = true;
            } else if (sortField instanceof SortedNumericSortField) {
                sortType = ((SortedNumericSortField) sortField).getNumericType();
                multiValued = true;
            } else {
                sortType = sortField.getType();
                multiValued = false;
            }
            switch(sortType) {
                case STRING:
                    if (multiValued) {
                        sortTypeString = "multi_valued_string";
                    } else {
                        sortTypeString = "string";
                    }
                    break;
                case LONG:
                    if (multiValued) {
                        sortTypeString = "multi_valued_long";
                    } else {
                        sortTypeString = "long";
                    }
                    break;
                case INT:
                    if (multiValued) {
                        sortTypeString = "multi_valued_int";
                    } else {
                        sortTypeString = "int";
                    }
                    break;
                case DOUBLE:
                    if (multiValued) {
                        sortTypeString = "multi_valued_double";
                    } else {
                        sortTypeString = "double";
                    }
                    break;
                case FLOAT:
                    if (multiValued) {
                        sortTypeString = "multi_valued_float";
                    } else {
                        sortTypeString = "float";
                    }
                    break;
                default:
                    throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
            }
            SimpleTextUtil.write(output, sortTypeString, scratch);
            SimpleTextUtil.writeNewline(output);
            if (sortField instanceof SortedSetSortField) {
                SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector();
                final String selectorString;
                if (selector == SortedSetSelector.Type.MIN) {
                    selectorString = "min";
                } else if (selector == SortedSetSelector.Type.MIDDLE_MIN) {
                    selectorString = "middle_min";
                } else if (selector == SortedSetSelector.Type.MIDDLE_MAX) {
                    selectorString = "middle_max";
                } else if (selector == SortedSetSelector.Type.MAX) {
                    selectorString = "max";
                } else {
                    throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector);
                }
                SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
                SimpleTextUtil.write(output, selectorString, scratch);
                SimpleTextUtil.writeNewline(output);
            } else if (sortField instanceof SortedNumericSortField) {
                SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector();
                final String selectorString;
                if (selector == SortedNumericSelector.Type.MIN) {
                    selectorString = "min";
                } else if (selector == SortedNumericSelector.Type.MAX) {
                    selectorString = "max";
                } else {
                    throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector);
                }
                SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
                SimpleTextUtil.write(output, selectorString, scratch);
                SimpleTextUtil.writeNewline(output);
            }
            SimpleTextUtil.write(output, SI_SORT_REVERSE);
            SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
            SimpleTextUtil.writeNewline(output);
            SimpleTextUtil.write(output, SI_SORT_MISSING);
            final Object missingValue = sortField.getMissingValue();
            final String missing;
            if (missingValue == null) {
                missing = "null";
            } else if (missingValue == SortField.STRING_FIRST) {
                missing = "first";
            } else if (missingValue == SortField.STRING_LAST) {
                missing = "last";
            } else {
                missing = missingValue.toString();
            }
            SimpleTextUtil.write(output, missing, scratch);
            SimpleTextUtil.writeNewline(output);
        }
        SimpleTextUtil.writeChecksum(output, scratch);
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) IndexOutput(org.apache.lucene.store.IndexOutput) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Sort(org.apache.lucene.search.Sort) SortedSetSelector(org.apache.lucene.search.SortedSetSelector) HashMap(java.util.HashMap) Map(java.util.Map) BytesRef(org.apache.lucene.util.BytesRef)

Example 65 with BytesRefBuilder

use of org.apache.lucene.util.BytesRefBuilder in project lucene-solr by apache.

the class SimpleTextTermVectorsReader method get.

@Override
public Fields get(int doc) throws IOException {
    SortedMap<String, SimpleTVTerms> fields = new TreeMap<>();
    in.seek(offsets[doc]);
    readLine();
    assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
    int numFields = parseIntAt(NUMFIELDS.length);
    if (numFields == 0) {
        // no vectors for this doc
        return null;
    }
    for (int i = 0; i < numFields; i++) {
        readLine();
        assert StringHelper.startsWith(scratch.get(), FIELD);
        // skip fieldNumber:
        parseIntAt(FIELD.length);
        readLine();
        assert StringHelper.startsWith(scratch.get(), FIELDNAME);
        String fieldName = readString(FIELDNAME.length, scratch);
        readLine();
        assert StringHelper.startsWith(scratch.get(), FIELDPOSITIONS);
        boolean positions = Boolean.parseBoolean(readString(FIELDPOSITIONS.length, scratch));
        readLine();
        assert StringHelper.startsWith(scratch.get(), FIELDOFFSETS);
        boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
        readLine();
        assert StringHelper.startsWith(scratch.get(), FIELDPAYLOADS);
        boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));
        readLine();
        assert StringHelper.startsWith(scratch.get(), FIELDTERMCOUNT);
        int termCount = parseIntAt(FIELDTERMCOUNT.length);
        SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
        fields.put(fieldName, terms);
        BytesRefBuilder term = new BytesRefBuilder();
        for (int j = 0; j < termCount; j++) {
            readLine();
            assert StringHelper.startsWith(scratch.get(), TERMTEXT);
            int termLength = scratch.length() - TERMTEXT.length;
            term.grow(termLength);
            term.setLength(termLength);
            System.arraycopy(scratch.bytes(), TERMTEXT.length, term.bytes(), 0, termLength);
            SimpleTVPostings postings = new SimpleTVPostings();
            terms.terms.put(term.toBytesRef(), postings);
            readLine();
            assert StringHelper.startsWith(scratch.get(), TERMFREQ);
            postings.freq = parseIntAt(TERMFREQ.length);
            if (positions || offsets) {
                if (positions) {
                    postings.positions = new int[postings.freq];
                    if (payloads) {
                        postings.payloads = new BytesRef[postings.freq];
                    }
                }
                if (offsets) {
                    postings.startOffsets = new int[postings.freq];
                    postings.endOffsets = new int[postings.freq];
                }
                for (int k = 0; k < postings.freq; k++) {
                    if (positions) {
                        readLine();
                        assert StringHelper.startsWith(scratch.get(), POSITION);
                        postings.positions[k] = parseIntAt(POSITION.length);
                        if (payloads) {
                            readLine();
                            assert StringHelper.startsWith(scratch.get(), PAYLOAD);
                            if (scratch.length() - PAYLOAD.length == 0) {
                                postings.payloads[k] = null;
                            } else {
                                byte[] payloadBytes = new byte[scratch.length() - PAYLOAD.length];
                                System.arraycopy(scratch.bytes(), PAYLOAD.length, payloadBytes, 0, payloadBytes.length);
                                postings.payloads[k] = new BytesRef(payloadBytes);
                            }
                        }
                    }
                    if (offsets) {
                        readLine();
                        assert StringHelper.startsWith(scratch.get(), STARTOFFSET);
                        postings.startOffsets[k] = parseIntAt(STARTOFFSET.length);
                        readLine();
                        assert StringHelper.startsWith(scratch.get(), ENDOFFSET);
                        postings.endOffsets[k] = parseIntAt(ENDOFFSET.length);
                    }
                }
            }
        }
    }
    return new SimpleTVFields(fields);
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) TreeMap(java.util.TreeMap) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)150 BytesRef (org.apache.lucene.util.BytesRef)79 ArrayList (java.util.ArrayList)21 IOException (java.io.IOException)17 Term (org.apache.lucene.index.Term)16 HashSet (java.util.HashSet)15 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)14 FieldType (org.apache.solr.schema.FieldType)14 IndexInput (org.apache.lucene.store.IndexInput)12 BytesRefIterator (org.apache.lucene.util.BytesRefIterator)10 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)10 IntsRef (org.apache.lucene.util.IntsRef)10 SchemaField (org.apache.solr.schema.SchemaField)10 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)9 ParseException (java.text.ParseException)8 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)8 DecimalFormat (java.text.DecimalFormat)7 HashMap (java.util.HashMap)7 Map (java.util.Map)7 Directory (org.apache.lucene.store.Directory)7