use of com.linkedin.pinot.core.segment.creator.ForwardIndexCreator in project pinot by linkedin.
the class SegmentColumnarIndexCreator method init.
@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
docIdCounter = 0;
config = segmentCreationSpec;
this.indexCreationInfoMap = indexCreationInfoMap;
dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
this.indexCreationInfoMap = indexCreationInfoMap;
invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
file = outDir;
// Check that the output directory does not exist
if (file.exists()) {
throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
}
file.mkdir();
this.schema = schema;
this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
// Initialize and build dictionaries
for (final FieldSpec spec : schema.getAllFieldSpecs()) {
String column = spec.getName();
final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
if (createDictionaryForColumn(info, config, spec)) {
dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
}
}
// For each column, build its dictionary and initialize a forwards and an inverted index
for (final String column : indexCreationInfoMap.keySet()) {
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
boolean[] isSorted = new boolean[1];
isSorted[0] = indexCreationInfo.isSorted();
SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
if (dictionaryCreator != null) {
dictionaryCreator.build(isSorted);
indexCreationInfo.setSorted(isSorted[0]);
dictionaryCache.put(column, new HashMap<Object, Object>());
}
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
FieldSpec fieldSpec = schema.getFieldSpecFor(column);
if (fieldSpec.isSingleValueField()) {
// Raw indexes store actual values, instead of dictionary ids.
if (buildRawIndex) {
forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
} else {
if (indexCreationInfo.isSorted()) {
forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
} else {
forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
} else {
if (buildRawIndex) {
// TODO: Add support for multi-valued columns.
throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
}
forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
for (String column : config.getInvertedIndexCreationColumns()) {
if (!schema.hasColumn(column)) {
LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
continue;
}
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
invertedIndexCreatorMap.put(column, invertedIndexCreator);
}
}
Aggregations