use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.
the class UninvertDocValuesMergePolicyTest method testIndexAndAddDocValues.
public void testIndexAndAddDocValues() throws Exception {
Random rand = random();
for (int i = 0; i < 100; i++) {
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
if (rand.nextBoolean()) {
assertU(commit());
}
}
assertU(commit());
// Assert everything has been indexed and there are no docvalues
withNewRawReader(h, topReader -> {
assertEquals(100, topReader.numDocs());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
});
addDocValuesTo(h, TEST_FIELD);
// Add some more documents with doc values turned on including updating some
for (int i = 90; i < 110; i++) {
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
if (rand.nextBoolean()) {
assertU(commit());
}
}
assertU(commit());
withNewRawReader(h, topReader -> {
assertEquals(110, topReader.numDocs());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
});
int optimizeSegments = 1;
assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));
// Assert all docs have the right docvalues
withNewRawReader(h, topReader -> {
assertEquals(110, topReader.numDocs());
assertEquals(optimizeSegments, topReader.leaves().size());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
for (LeafReaderContext ctx : topReader.leaves()) {
LeafReader r = ctx.reader();
SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
for (int i = 0; i < r.numDocs(); ++i) {
Document doc = r.document(i);
String v = doc.getField(TEST_FIELD).stringValue();
String id = doc.getField(ID_FIELD).stringValue();
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
assertEquals(v, id);
docvalues.nextDoc();
assertEquals(v, docvalues.binaryValue().utf8ToString());
}
}
});
}
use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.
the class Lucene60PointsWriter method merge.
@Override
public void merge(MergeState mergeState) throws IOException {
/**
* If indexSort is activated and some of the leaves are not sorted the next test will catch that and the non-optimized merge will run.
* If the readers are all sorted then it's safe to perform a bulk merge of the points.
**/
for (PointsReader reader : mergeState.pointsReaders) {
if (reader instanceof Lucene60PointsReader == false) {
// We can only bulk merge when all to-be-merged segments use our format:
super.merge(mergeState);
return;
}
}
for (PointsReader reader : mergeState.pointsReaders) {
if (reader != null) {
reader.checkIntegrity();
}
}
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
if (fieldInfo.getPointDimensionCount() != 0) {
if (fieldInfo.getPointDimensionCount() == 1) {
boolean singleValuePerDoc = true;
// Worst case total maximum size (if none of the points are deleted):
long totMaxSize = 0;
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
PointValues values = reader.getValues(fieldInfo.name);
if (values != null) {
totMaxSize += values.size();
singleValuePerDoc &= values.size() == values.getDocCount();
}
}
}
}
// we were simply reindexing them:
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, totMaxSize, singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>();
List<MergeState.DocMap> docMaps = new ArrayList<>();
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
// we confirmed this up above
assert reader instanceof Lucene60PointsReader;
Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;
// NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to this
// reader's FieldInfo as we do below) because field numbers can easily be different
// when addIndexes(Directory...) copies over segments from another index:
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
if (bkdReader != null) {
bkdReaders.add(bkdReader);
docMaps.add(mergeState.docMaps[i]);
}
}
}
}
long fp = writer.merge(dataOut, docMaps, bkdReaders);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
}
} else {
mergeOneField(mergeState, fieldInfo);
}
}
}
finish();
}
use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.
the class Lucene60FieldInfosFormat method read.
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
Throwable priorE = null;
FieldInfo[] infos = null;
try {
CodecUtil.checkIndexHeader(input, Lucene60FieldInfosFormat.CODEC_NAME, Lucene60FieldInfosFormat.FORMAT_START, Lucene60FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
//read in the size
final int size = input.readVInt();
infos = new FieldInfo[size];
// previous field's attribute map, we share when possible:
Map<String, String> lastAttributes = Collections.emptyMap();
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
if (fieldNumber < 0) {
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
}
byte bits = input.readByte();
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
// DV Types are packed in one byte
final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
final long dvGen = input.readLong();
Map<String, String> attributes = input.readMapOfStrings();
// just use the last field's map if its the same
if (attributes.equals(lastAttributes)) {
attributes = lastAttributes;
}
lastAttributes = attributes;
int pointDimensionCount = input.readVInt();
int pointNumBytes;
if (pointDimensionCount != 0) {
pointNumBytes = input.readVInt();
} else {
pointNumBytes = 0;
}
try {
infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, dvGen, attributes, pointDimensionCount, pointNumBytes);
infos[i].checkConsistency();
} catch (IllegalStateException e) {
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
}
}
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return new FieldInfos(infos);
}
}
use of org.apache.lucene.index.FieldInfos in project lucene-solr by apache.
the class FieldValueQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
FieldInfos fieldInfos = context.reader().getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
if (fieldInfo == null) {
return null;
}
DocValuesType dvType = fieldInfo.getDocValuesType();
LeafReader reader = context.reader();
DocIdSetIterator iterator;
switch(dvType) {
case NONE:
return null;
case NUMERIC:
iterator = reader.getNumericDocValues(field);
break;
case BINARY:
iterator = reader.getBinaryDocValues(field);
break;
case SORTED:
iterator = reader.getSortedDocValues(field);
break;
case SORTED_NUMERIC:
iterator = reader.getSortedNumericDocValues(field);
break;
case SORTED_SET:
iterator = reader.getSortedSetDocValues(field);
break;
default:
throw new AssertionError();
}
return new ConstantScoreScorer(this, score(), iterator);
}
};
}
use of org.apache.lucene.index.FieldInfos in project stanbol by apache.
the class FstConfig method buildConfig.
/**
* Inspects the SolrCore to get defined languages for the configured
* {@link #indexField} and {@link #storeField}. Initialises the
* {@link #getCorpusCreationInfos()}
* @param schema the schema of the SolrCore
* @param indexReader the index reader of the SolrCore
*/
public void buildConfig(IndexSchema schema, AtomicReader indexReader) {
//we need this twice
FieldInfos fieldInfos = indexReader.getFieldInfos();
String fieldWildcard = encodeLanguage(indexField, "*");
for (FieldInfo fieldInfo : fieldInfos) {
//try to match the field names against the wildcard
if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
//for matches parse the language from the field name
String language = parseLanguage(fieldInfo.name, indexField);
if (language != null) {
//generate the FST file name
StringBuilder fstFileName = new StringBuilder(fstName);
if (!language.isEmpty()) {
fstFileName.append('.').append(language);
}
fstFileName.append(".fst");
File fstFile = new File(fstDirectory, fstFileName.toString());
//get the FieldType of the field from the Solr schema
FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
if (fieldType != null) {
//if the fieldType is present
//we need also to check if the stored field with
//the labels is present
//get the stored Field and check if it is present!
String storeFieldName;
if (storeField == null) {
//storeField == indexField
storeFieldName = fieldInfo.name;
} else {
// check that the storeField is present in the index
storeFieldName = encodeLanguage(storeField, language);
FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
if (storedFieldInfos == null) {
log.warn(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
storeFieldName = null;
}
}
if (storeFieldName != null) {
// == valid configuration
CorpusCreationInfo fstInfo = new CorpusCreationInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile);
log.info(" ... init {} ", fstInfo);
addCorpus(fstInfo);
}
} else {
log.warn(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
}
}
//else the field matched the wildcard, but has not passed the
//encoding test.
}
//Solr field does not match the field definition in the config
}
// end iterate over all fields in the SolrIndex
}
Aggregations