Search in sources :

Example 11 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.

the class UninvertDocValuesMergePolicyTest method testIndexAndAddDocValues.

public void testIndexAndAddDocValues() throws Exception {
    Random rand = random();
    for (int i = 0; i < 100; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }
    assertU(commit());
    // Assert everything has been indexed and there are no docvalues
    withNewRawReader(h, topReader -> {
        assertEquals(100, topReader.numDocs());
        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });
    addDocValuesTo(h, TEST_FIELD);
    // Add some more documents with doc values turned on including updating some
    for (int i = 90; i < 110; i++) {
        assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
        if (rand.nextBoolean()) {
            assertU(commit());
        }
    }
    assertU(commit());
    withNewRawReader(h, topReader -> {
        assertEquals(110, topReader.numDocs());
        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
    });
    int optimizeSegments = 1;
    assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));
    // Assert all docs have the right docvalues
    withNewRawReader(h, topReader -> {
        assertEquals(110, topReader.numDocs());
        assertEquals(optimizeSegments, topReader.leaves().size());
        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
        for (LeafReaderContext ctx : topReader.leaves()) {
            LeafReader r = ctx.reader();
            SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
            for (int i = 0; i < r.numDocs(); ++i) {
                Document doc = r.document(i);
                String v = doc.getField(TEST_FIELD).stringValue();
                String id = doc.getField(ID_FIELD).stringValue();
                assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
                assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
                assertEquals(v, id);
                docvalues.nextDoc();
                assertEquals(v, docvalues.binaryValue().utf8ToString());
            }
        }
    });
}
Also used : FieldInfos(org.apache.lucene.index.FieldInfos) Random(java.util.Random) LeafReader(org.apache.lucene.index.LeafReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 12 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.

the class Lucene60PointsWriter method merge.

@Override
public void merge(MergeState mergeState) throws IOException {
    /**
     * If indexSort is activated and some of the leaves are not sorted the next test will catch that and the non-optimized merge will run.
     * If the readers are all sorted then it's safe to perform a bulk merge of the points.
     **/
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader instanceof Lucene60PointsReader == false) {
            // We can only bulk merge when all to-be-merged segments use our format:
            super.merge(mergeState);
            return;
        }
    }
    for (PointsReader reader : mergeState.pointsReaders) {
        if (reader != null) {
            reader.checkIntegrity();
        }
    }
    for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
        if (fieldInfo.getPointDimensionCount() != 0) {
            if (fieldInfo.getPointDimensionCount() == 1) {
                boolean singleValuePerDoc = true;
                // Worst case total maximum size (if none of the points are deleted):
                long totMaxSize = 0;
                for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                    PointsReader reader = mergeState.pointsReaders[i];
                    if (reader != null) {
                        FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                        FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                        if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                            PointValues values = reader.getValues(fieldInfo.name);
                            if (values != null) {
                                totMaxSize += values.size();
                                singleValuePerDoc &= values.size() == values.getDocCount();
                            }
                        }
                    }
                }
                // we were simply reindexing them:
                try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, totMaxSize, singleValuePerDoc)) {
                    List<BKDReader> bkdReaders = new ArrayList<>();
                    List<MergeState.DocMap> docMaps = new ArrayList<>();
                    for (int i = 0; i < mergeState.pointsReaders.length; i++) {
                        PointsReader reader = mergeState.pointsReaders[i];
                        if (reader != null) {
                            // we confirmed this up above
                            assert reader instanceof Lucene60PointsReader;
                            Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;
                            // NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to this
                            // reader's FieldInfo as we do below) because field numbers can easily be different
                            // when addIndexes(Directory...) copies over segments from another index:
                            FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                            FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                            if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
                                BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
                                if (bkdReader != null) {
                                    bkdReaders.add(bkdReader);
                                    docMaps.add(mergeState.docMaps[i]);
                                }
                            }
                        }
                    }
                    long fp = writer.merge(dataOut, docMaps, bkdReaders);
                    if (fp != -1) {
                        indexFPs.put(fieldInfo.name, fp);
                    }
                }
            } else {
                mergeOneField(mergeState, fieldInfo);
            }
        }
    }
    finish();
}
Also used : ArrayList(java.util.ArrayList) FieldInfos(org.apache.lucene.index.FieldInfos) BKDReader(org.apache.lucene.util.bkd.BKDReader) MutablePointValues(org.apache.lucene.codecs.MutablePointValues) PointValues(org.apache.lucene.index.PointValues) PointsReader(org.apache.lucene.codecs.PointsReader) BKDWriter(org.apache.lucene.util.bkd.BKDWriter) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 13 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.

the class Lucene60FieldInfosFormat method read.

@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
    try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
        Throwable priorE = null;
        FieldInfo[] infos = null;
        try {
            CodecUtil.checkIndexHeader(input, Lucene60FieldInfosFormat.CODEC_NAME, Lucene60FieldInfosFormat.FORMAT_START, Lucene60FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
            //read in the size
            final int size = input.readVInt();
            infos = new FieldInfo[size];
            // previous field's attribute map, we share when possible:
            Map<String, String> lastAttributes = Collections.emptyMap();
            for (int i = 0; i < size; i++) {
                String name = input.readString();
                final int fieldNumber = input.readVInt();
                if (fieldNumber < 0) {
                    throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
                }
                byte bits = input.readByte();
                boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                boolean omitNorms = (bits & OMIT_NORMS) != 0;
                boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
                final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
                // DV Types are packed in one byte
                final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
                final long dvGen = input.readLong();
                Map<String, String> attributes = input.readMapOfStrings();
                // just use the last field's map if its the same
                if (attributes.equals(lastAttributes)) {
                    attributes = lastAttributes;
                }
                lastAttributes = attributes;
                int pointDimensionCount = input.readVInt();
                int pointNumBytes;
                if (pointDimensionCount != 0) {
                    pointNumBytes = input.readVInt();
                } else {
                    pointNumBytes = 0;
                }
                try {
                    infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, attributes, pointDimensionCount, pointNumBytes);
                    infos[i].checkConsistency();
                } catch (IllegalStateException e) {
                    throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
                }
            }
        } catch (Throwable exception) {
            priorE = exception;
        } finally {
            CodecUtil.checkFooter(input, priorE);
        }
        return new FieldInfos(infos);
    }
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) IndexOptions(org.apache.lucene.index.IndexOptions) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) FieldInfos(org.apache.lucene.index.FieldInfos) DocValuesType(org.apache.lucene.index.DocValuesType) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 14 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.

the class FieldValueQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            FieldInfos fieldInfos = context.reader().getFieldInfos();
            FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
            if (fieldInfo == null) {
                return null;
            }
            DocValuesType dvType = fieldInfo.getDocValuesType();
            LeafReader reader = context.reader();
            DocIdSetIterator iterator;
            switch(dvType) {
                case NONE:
                    return null;
                case NUMERIC:
                    iterator = reader.getNumericDocValues(field);
                    break;
                case BINARY:
                    iterator = reader.getBinaryDocValues(field);
                    break;
                case SORTED:
                    iterator = reader.getSortedDocValues(field);
                    break;
                case SORTED_NUMERIC:
                    iterator = reader.getSortedNumericDocValues(field);
                    break;
                case SORTED_SET:
                    iterator = reader.getSortedSetDocValues(field);
                    break;
                default:
                    throw new AssertionError();
            }
            return new ConstantScoreScorer(this, score(), iterator);
        }
    };
}
Also used : FieldInfos(org.apache.lucene.index.FieldInfos) LeafReader(org.apache.lucene.index.LeafReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocValuesType(org.apache.lucene.index.DocValuesType) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 15 with FieldInfos

use of org.apache.lucene.index.FieldInfos in project stanbol by apache.

the class FstConfig method buildConfig.

/**
     * Inspects the SolrCore to get defined languages for the configured
     * {@link #indexField} and {@link #storeField}. Initialises the
     * {@link #getCorpusCreationInfos()}
     * @param schema the schema of the SolrCore
     * @param indexReader the index reader of the SolrCore
     */
public void buildConfig(IndexSchema schema, AtomicReader indexReader) {
    //we need this twice
    FieldInfos fieldInfos = indexReader.getFieldInfos();
    String fieldWildcard = encodeLanguage(indexField, "*");
    for (FieldInfo fieldInfo : fieldInfos) {
        //try to match the field names against the wildcard
        if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
            //for matches parse the language from the field name
            String language = parseLanguage(fieldInfo.name, indexField);
            if (language != null) {
                //generate the FST file name
                StringBuilder fstFileName = new StringBuilder(fstName);
                if (!language.isEmpty()) {
                    fstFileName.append('.').append(language);
                }
                fstFileName.append(".fst");
                File fstFile = new File(fstDirectory, fstFileName.toString());
                //get the FieldType of the field from the Solr schema
                FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
                if (fieldType != null) {
                    //if the fieldType is present
                    //we need also to check if the stored field with
                    //the labels is present
                    //get the stored Field and check if it is present!
                    String storeFieldName;
                    if (storeField == null) {
                        //storeField == indexField
                        storeFieldName = fieldInfo.name;
                    } else {
                        // check that the storeField is present in the index
                        storeFieldName = encodeLanguage(storeField, language);
                        FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
                        if (storedFieldInfos == null) {
                            log.warn(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
                            storeFieldName = null;
                        }
                    }
                    if (storeFieldName != null) {
                        // == valid configuration
                        CorpusCreationInfo fstInfo = new CorpusCreationInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile);
                        log.info(" ... init {} ", fstInfo);
                        addCorpus(fstInfo);
                    }
                } else {
                    log.warn(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
                }
            }
        //else the field matched the wildcard, but has not passed the
        //encoding test.
        }
    //Solr field does not match the field definition in the config
    }
// end iterate over all fields in the SolrIndex        
}
Also used : FieldInfos(org.apache.lucene.index.FieldInfos) File(java.io.File) FieldInfo(org.apache.lucene.index.FieldInfo) FieldType(org.apache.solr.schema.FieldType)

Aggregations

FieldInfos (org.apache.lucene.index.FieldInfos)15 FieldInfo (org.apache.lucene.index.FieldInfo)8 LeafReader (org.apache.lucene.index.LeafReader)6 Document (org.apache.lucene.document.Document)5 DocValuesType (org.apache.lucene.index.DocValuesType)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 IndexOptions (org.apache.lucene.index.IndexOptions)3 SortedDocValues (org.apache.lucene.index.SortedDocValues)3 SolrCore (org.apache.solr.core.SolrCore)3 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)3 File (java.io.File)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)2 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)2 AbstractIterator (com.google.common.collect.AbstractIterator)1 IOException (java.io.IOException)1 Date (java.util.Date)1 Deque (java.util.Deque)1 Random (java.util.Random)1