Search in sources :

Example 1 with DocValuesType

use of org.apache.lucene.index.DocValuesType in project lucene-solr by apache.

the class SolrDocumentFetcher method decorateDocValueFields.

/**
   * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
   *
   * @param doc
   *          A SolrDocument or SolrInputDocument instance where docValues will be added
   * @param docid
   *          The lucene docid of the document to be populated
   * @param fields
   *          The list of docValues fields to be decorated
   */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            // Searcher doesn't have info about this field, hence ignore it.
            continue;
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch(dvType) {
            case NUMERIC:
                final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
                if (ndv == null) {
                    continue;
                }
                Long val;
                if (ndv.advanceExact(localId)) {
                    val = ndv.longValue();
                } else {
                    continue;
                }
                Object newVal = val;
                if (schemaField.getType().isPointField()) {
                    // TODO: Maybe merge PointField with TrieFields here
                    NumberType type = schemaField.getType().getNumberType();
                    switch(type) {
                        case INTEGER:
                            newVal = val.intValue();
                            break;
                        case LONG:
                            newVal = val.longValue();
                            break;
                        case FLOAT:
                            newVal = Float.intBitsToFloat(val.intValue());
                            break;
                        case DOUBLE:
                            newVal = Double.longBitsToDouble(val);
                            break;
                        case DATE:
                            newVal = new Date(val);
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                    }
                } else {
                    if (schemaField.getType() instanceof TrieIntField) {
                        newVal = val.intValue();
                    } else if (schemaField.getType() instanceof TrieFloatField) {
                        newVal = Float.intBitsToFloat(val.intValue());
                    } else if (schemaField.getType() instanceof TrieDoubleField) {
                        newVal = Double.longBitsToDouble(val);
                    } else if (schemaField.getType() instanceof TrieDateField) {
                        newVal = new Date(val);
                    } else if (schemaField.getType() instanceof EnumField) {
                        newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                    }
                }
                doc.addField(fieldName, newVal);
                break;
            case BINARY:
                BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
                if (bdv == null) {
                    continue;
                }
                BytesRef value;
                if (bdv.advanceExact(localId)) {
                    value = BytesRef.deepCopyOf(bdv.binaryValue());
                } else {
                    continue;
                }
                doc.addField(fieldName, value);
                break;
            case SORTED:
                SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
                if (sdv == null) {
                    continue;
                }
                if (sdv.advanceExact(localId)) {
                    final BytesRef bRef = sdv.binaryValue();
                    // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                    if (schemaField.getType() instanceof BoolField) {
                        doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                    } else {
                        doc.addField(fieldName, bRef.utf8ToString());
                    }
                }
                break;
            case SORTED_NUMERIC:
                final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
                NumberType type = schemaField.getType().getNumberType();
                if (numericDv != null) {
                    if (numericDv.advance(localId) == localId) {
                        final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                        for (int i = 0; i < numericDv.docValueCount(); i++) {
                            long number = numericDv.nextValue();
                            switch(type) {
                                case INTEGER:
                                    outValues.add((int) number);
                                    break;
                                case LONG:
                                    outValues.add(number);
                                    break;
                                case FLOAT:
                                    outValues.add(NumericUtils.sortableIntToFloat((int) number));
                                    break;
                                case DOUBLE:
                                    outValues.add(NumericUtils.sortableLongToDouble(number));
                                    break;
                                case DATE:
                                    outValues.add(new Date(number));
                                    break;
                                default:
                                    throw new AssertionError("Unexpected PointType: " + type);
                            }
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case SORTED_SET:
                final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
                if (values != null && values.getValueCount() > 0) {
                    if (values.advance(localId) == localId) {
                        final List<Object> outValues = new LinkedList<>();
                        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
                            value = values.lookupOrd(ord);
                            outValues.add(schemaField.getType().toObject(schemaField, value));
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case NONE:
                break;
        }
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) TrieIntField(org.apache.solr.schema.TrieIntField) ArrayList(java.util.ArrayList) TrieDateField(org.apache.solr.schema.TrieDateField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocValuesType(org.apache.lucene.index.DocValuesType) TrieFloatField(org.apache.solr.schema.TrieFloatField) BytesRef(org.apache.lucene.util.BytesRef) TrieDoubleField(org.apache.solr.schema.TrieDoubleField) EnumField(org.apache.solr.schema.EnumField) BoolField(org.apache.solr.schema.BoolField) LeafReader(org.apache.lucene.index.LeafReader) Date(java.util.Date) SortedDocValues(org.apache.lucene.index.SortedDocValues) LinkedList(java.util.LinkedList) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 2 with DocValuesType

use of org.apache.lucene.index.DocValuesType in project lucene-solr by apache.

the class AllGroupHeadsCollectorTest method testBasic.

public void testBasic() throws Exception {
    final String groupField = "author";
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    DocValuesType valueType = DocValuesType.SORTED;
    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 1));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("1")));
    w.addDocument(doc);
    // 1
    doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "some more random text blob", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 2));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("2")));
    w.addDocument(doc);
    // 2
    doc = new Document();
    addGroupField(doc, groupField, "author1", valueType);
    doc.add(newTextField("content", "some more random textual data", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 3));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("3")));
    w.addDocument(doc);
    // To ensure a second segment
    w.commit();
    // 3
    doc = new Document();
    addGroupField(doc, groupField, "author2", valueType);
    doc.add(newTextField("content", "some random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 4));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("4")));
    w.addDocument(doc);
    // 4
    doc = new Document();
    addGroupField(doc, groupField, "author3", valueType);
    doc.add(newTextField("content", "some more random text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 5));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("5")));
    w.addDocument(doc);
    // 5
    doc = new Document();
    addGroupField(doc, groupField, "author3", valueType);
    doc.add(newTextField("content", "random blob", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 6));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
    w.addDocument(doc);
    // 6 -- no author field
    doc = new Document();
    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 6));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("6")));
    w.addDocument(doc);
    // 7 -- no author field
    doc = new Document();
    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
    doc.add(new NumericDocValuesField("id_1", 7));
    doc.add(new SortedDocValuesField("id_2", new BytesRef("7")));
    w.addDocument(doc);
    IndexReader reader = w.getReader();
    IndexSearcher indexSearcher = newSearcher(reader);
    w.close();
    int maxDoc = reader.maxDoc();
    Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type.INT, true));
    AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
    indexSearcher.search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    // STRING sort type triggers different implementation
    Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type.STRING, true));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup2);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    assertTrue(arrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type.STRING, false));
    allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup3);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
    // 7 b/c higher doc id wins, even if order of field is in not in reverse.
    assertTrue(arrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads()));
    assertTrue(openBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
    indexSearcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) DocValuesType(org.apache.lucene.index.DocValuesType) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 3 with DocValuesType

use of org.apache.lucene.index.DocValuesType in project lucene-solr by apache.

the class AllGroupHeadsCollectorTest method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println(String.format(Locale.ROOT, "TEST: iter=%d total=%d", iter, numberOfRuns));
        }
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        }
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        }
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        }
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            }
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
            }
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        DocValuesType valueType = DocValuesType.SORTED;
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field valuesField = null;
        valuesField = new SortedDocValuesField("group", new BytesRef());
        doc.add(valuesField);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        doc.add(sort1);
        docNoGroup.add(sort1);
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        doc.add(sort2);
        docNoGroup.add(sort2);
        Field sort3 = new SortedDocValuesField("sort3", new BytesRef());
        doc.add(sort3);
        docNoGroup.add(sort3);
        Field content = newTextField("content", "", Field.Store.NO);
        doc.add(content);
        docNoGroup.add(content);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        doc.add(idDV);
        docNoGroup.add(idDV);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            }
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), new BytesRef(String.format(Locale.ROOT, "%05d", i)), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString() + " sort3=" + groupDoc.sort3.utf8ToString());
            }
            groupDocs[i] = groupDoc;
            if (groupDoc.group != null) {
                valuesField.setBytesValue(new BytesRef(groupDoc.group.utf8ToString()));
            }
            sort1.setBytesValue(groupDoc.sort1);
            sort2.setBytesValue(groupDoc.sort2);
            sort3.setBytesValue(groupDoc.sort3);
            content.setStringValue(groupDoc.content);
            idDV.setLongValue(groupDoc.id);
            if (groupDoc.group == null) {
                w.addDocument(docNoGroup);
            } else {
                w.addDocument(doc);
            }
        }
        final DirectoryReader r = w.getReader();
        w.close();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        final int[] docIDToFieldId = new int[numDocs];
        final int[] fieldIdToDocID = new int[numDocs];
        for (int i = 0; i < numDocs; i++) {
            assertEquals(i, values.nextDoc());
            int fieldId = (int) values.longValue();
            docIDToFieldId[i] = fieldId;
            fieldIdToDocID[fieldId] = i;
        }
        final IndexSearcher s = newSearcher(r);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToFieldId[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                assertEquals(gd.id, idValue);
                seenIDs.add(idValue);
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
            }
        }
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        // make sure scores are sane
        for (GroupDoc gd : groupDocs) {
            assertTrue(Float.isFinite(gd.score));
            assertTrue(gd.score >= 0.0);
        }
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("TEST: searchIter=" + searchIter);
            }
            final String searchTerm = "real" + random().nextInt(3);
            boolean sortByScoreOnly = random().nextBoolean();
            Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
            AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
            s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
            int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
            int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
            // The actual group heads contains Lucene ids. Need to change them into our id value.
            for (int i = 0; i < actualGroupHeads.length; i++) {
                actualGroupHeads[i] = docIDToFieldId[actualGroupHeads[i]];
            }
            // Allows us the easily iterate and assert the actual and expected results.
            Arrays.sort(expectedGroupHeads);
            Arrays.sort(actualGroupHeads);
            if (VERBOSE) {
                System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName());
                System.out.println("Sort within group: " + sortWithinGroup);
                System.out.println("Num group: " + numGroups);
                System.out.println("Num doc: " + numDocs);
                System.out.println("\n=== Expected: \n");
                for (int expectedDocId : expectedGroupHeads) {
                    GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                    String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
                    System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(), expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId));
                }
                System.out.println("\n=== Actual: \n");
                for (int actualDocId : actualGroupHeads) {
                    GroupDoc actualGroupDoc = groupDocs[actualDocId];
                    String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
                    System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(), actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId));
                }
                System.out.println("\n===================================================================================");
            }
            assertArrayEquals(expectedGroupHeads, actualGroupHeads);
        }
        r.close();
        dir.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) DocValuesType(org.apache.lucene.index.DocValuesType) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 4 with DocValuesType

use of org.apache.lucene.index.DocValuesType in project lucene-solr by apache.

the class Lucene50FieldInfosFormat method read.

@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
        Throwable priorE = null;
        FieldInfo[] infos = null;
        try {
            CodecUtil.checkIndexHeader(input, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_START, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
            //read in the size
            final int size = input.readVInt();
            infos = new FieldInfo[size];
            // previous field's attribute map, we share when possible:
            Map<String, String> lastAttributes = Collections.emptyMap();
            for (int i = 0; i < size; i++) {
                String name = input.readString();
                final int fieldNumber = input.readVInt();
                if (fieldNumber < 0) {
                    throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
                }
                byte bits = input.readByte();
                boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                boolean omitNorms = (bits & OMIT_NORMS) != 0;
                boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
                final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
                // DV Types are packed in one byte
                final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
                final long dvGen = input.readLong();
                Map<String, String> attributes = input.readMapOfStrings();
                // just use the last field's map if its the same
                if (attributes.equals(lastAttributes)) {
                    attributes = lastAttributes;
                }
                lastAttributes = attributes;
                try {
                    infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, attributes, 0, 0);
                    infos[i].checkConsistency();
                } catch (IllegalStateException e) {
                    throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
                }
            }
        } catch (Throwable exception) {
            priorE = exception;
        } finally {
            CodecUtil.checkFooter(input, priorE);
        }
        return new FieldInfos(infos);
    }
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexOptions(org.apache.lucene.index.IndexOptions) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) FieldInfos(org.apache.lucene.index.FieldInfos) DocValuesType(org.apache.lucene.index.DocValuesType) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 5 with DocValuesType

use of org.apache.lucene.index.DocValuesType in project lucene-solr by apache.

the class SimpleTextFieldInfosFormat method read.

@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
    ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
    BytesRefBuilder scratch = new BytesRefBuilder();
    boolean success = false;
    try {
        SimpleTextUtil.readLine(input, scratch);
        assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
        final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
        FieldInfo[] infos = new FieldInfo[size];
        for (int i = 0; i < size; i++) {
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NAME);
            String name = readString(NAME.length, scratch);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NUMBER);
            int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), INDEXOPTIONS);
            String s = readString(INDEXOPTIONS.length, scratch);
            final IndexOptions indexOptions = IndexOptions.valueOf(s);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), STORETV);
            boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), PAYLOADS);
            boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NORMS);
            boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DOCVALUES);
            String dvType = readString(DOCVALUES.length, scratch);
            final DocValuesType docValuesType = docValuesType(dvType);
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
            final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), NUM_ATTS);
            int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
            Map<String, String> atts = new HashMap<>();
            for (int j = 0; j < numAtts; j++) {
                SimpleTextUtil.readLine(input, scratch);
                assert StringHelper.startsWith(scratch.get(), ATT_KEY);
                String key = readString(ATT_KEY.length, scratch);
                SimpleTextUtil.readLine(input, scratch);
                assert StringHelper.startsWith(scratch.get(), ATT_VALUE);
                String value = readString(ATT_VALUE.length, scratch);
                atts.put(key, value);
            }
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DIM_COUNT);
            int dimensionalCount = Integer.parseInt(readString(DIM_COUNT.length, scratch));
            SimpleTextUtil.readLine(input, scratch);
            assert StringHelper.startsWith(scratch.get(), DIM_NUM_BYTES);
            int dimensionalNumBytes = Integer.parseInt(readString(DIM_NUM_BYTES.length, scratch));
            infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, Collections.unmodifiableMap(atts), dimensionalCount, dimensionalNumBytes);
        }
        SimpleTextUtil.checkFooter(input);
        FieldInfos fieldInfos = new FieldInfos(infos);
        success = true;
        return fieldInfos;
    } finally {
        if (success) {
            input.close();
        } else {
            IOUtils.closeWhileHandlingException(input);
        }
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexOptions(org.apache.lucene.index.IndexOptions) HashMap(java.util.HashMap) FieldInfos(org.apache.lucene.index.FieldInfos) DocValuesType(org.apache.lucene.index.DocValuesType) FieldInfo(org.apache.lucene.index.FieldInfo)

Aggregations

DocValuesType (org.apache.lucene.index.DocValuesType)9 FieldInfo (org.apache.lucene.index.FieldInfo)5 FieldInfos (org.apache.lucene.index.FieldInfos)4 Document (org.apache.lucene.document.Document)3 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)3 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)3 IndexOptions (org.apache.lucene.index.IndexOptions)3 LeafReader (org.apache.lucene.index.LeafReader)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 NumericDocValues (org.apache.lucene.index.NumericDocValues)3 IndexSearcher (org.apache.lucene.search.IndexSearcher)3 BytesRef (org.apache.lucene.util.BytesRef)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)2 Field (org.apache.lucene.document.Field)2 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)2 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)2 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)2