use of com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator in project pinot by linkedin.
the class SegmentColumnarIndexCreator method init.
@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
docIdCounter = 0;
config = segmentCreationSpec;
this.indexCreationInfoMap = indexCreationInfoMap;
dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
this.indexCreationInfoMap = indexCreationInfoMap;
invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
file = outDir;
// Check that the output directory does not exist
if (file.exists()) {
throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
}
file.mkdir();
this.schema = schema;
this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
// Initialize and build dictionaries
for (final FieldSpec spec : schema.getAllFieldSpecs()) {
String column = spec.getName();
final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
if (createDictionaryForColumn(info, config, spec)) {
dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
}
}
// For each column, build its dictionary and initialize a forwards and an inverted index
for (final String column : indexCreationInfoMap.keySet()) {
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
boolean[] isSorted = new boolean[1];
isSorted[0] = indexCreationInfo.isSorted();
SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
if (dictionaryCreator != null) {
dictionaryCreator.build(isSorted);
indexCreationInfo.setSorted(isSorted[0]);
dictionaryCache.put(column, new HashMap<Object, Object>());
}
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
FieldSpec fieldSpec = schema.getFieldSpecFor(column);
if (fieldSpec.isSingleValueField()) {
// Raw indexes store actual values, instead of dictionary ids.
if (buildRawIndex) {
forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
} else {
if (indexCreationInfo.isSorted()) {
forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
} else {
forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
} else {
if (buildRawIndex) {
// TODO: Add support for multi-valued columns.
throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
}
forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
for (String column : config.getInvertedIndexCreationColumns()) {
if (!schema.hasColumn(column)) {
LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
continue;
}
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
invertedIndexCreatorMap.put(column, invertedIndexCreator);
}
}
use of com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator in project pinot by linkedin.
the class BaseDefaultColumnHandler method createColumnV1Indices.
/**
* Helper method to create the V1 indices (dictionary and forward index) for a column.
*
* @param column column name.
*/
protected void createColumnV1Indices(String column) throws Exception {
FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
Preconditions.checkNotNull(fieldSpec);
// Generate column index creation information.
int totalDocs = _segmentMetadata.getTotalDocs();
int totalRawDocs = _segmentMetadata.getTotalRawDocs();
int totalAggDocs = totalDocs - totalRawDocs;
FieldSpec.DataType dataType = fieldSpec.getDataType();
Object defaultValue = fieldSpec.getDefaultNullValue();
boolean isSingleValue = fieldSpec.isSingleValueField();
int maxNumberOfMultiValueElements = isSingleValue ? 0 : 1;
int dictionaryElementSize = 0;
Object sortedArray;
switch(dataType) {
case STRING:
Preconditions.checkState(defaultValue instanceof String);
String stringDefaultValue = (String) defaultValue;
// Length of the UTF-8 encoded byte array.
dictionaryElementSize = stringDefaultValue.getBytes("UTF8").length;
sortedArray = new String[] { stringDefaultValue };
break;
case INT:
Preconditions.checkState(defaultValue instanceof Integer);
sortedArray = new int[] { (Integer) defaultValue };
break;
case LONG:
Preconditions.checkState(defaultValue instanceof Long);
sortedArray = new long[] { (Long) defaultValue };
break;
case FLOAT:
Preconditions.checkState(defaultValue instanceof Float);
sortedArray = new float[] { (Float) defaultValue };
break;
case DOUBLE:
Preconditions.checkState(defaultValue instanceof Double);
sortedArray = new double[] { (Double) defaultValue };
break;
default:
throw new UnsupportedOperationException("Unsupported data type: " + dataType + " for column: " + column);
}
ColumnIndexCreationInfo columnIndexCreationInfo = new ColumnIndexCreationInfo(true, /*createDictionary*/
defaultValue, /*min*/
defaultValue, /*max*/
sortedArray, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.SORTED_INDEX, isSingleValue, /*isSortedColumn*/
false, /*hasNulls*/
totalDocs, /*totalNumberOfEntries*/
maxNumberOfMultiValueElements, -1, /* Unused max length*/
true, /*isAutoGenerated*/
defaultValue);
// Create dictionary.
// We will have only one value in the dictionary.
SegmentDictionaryCreator segmentDictionaryCreator = new SegmentDictionaryCreator(false, /*hasNulls*/
sortedArray, fieldSpec, _indexDir, V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
segmentDictionaryCreator.build(new boolean[] { true });
segmentDictionaryCreator.close();
// Create forward index.
if (isSingleValue) {
// Single-value column.
SingleValueSortedForwardIndexCreator svFwdIndexCreator = new SingleValueSortedForwardIndexCreator(_indexDir, 1, /*cardinality*/
fieldSpec);
for (int docId = 0; docId < totalDocs; docId++) {
svFwdIndexCreator.add(0, /*dictionaryId*/
docId);
}
svFwdIndexCreator.close();
} else {
// Multi-value column.
MultiValueUnsortedForwardIndexCreator mvFwdIndexCreator = new MultiValueUnsortedForwardIndexCreator(fieldSpec, _indexDir, 1, /*cardinality*/
totalDocs, /*numDocs*/
totalDocs, /*totalNumberOfValues*/
false);
int[] dictionaryIds = { 0 };
for (int docId = 0; docId < totalDocs; docId++) {
mvFwdIndexCreator.index(docId, dictionaryIds);
}
mvFwdIndexCreator.close();
}
// Add the column metadata information to the metadata properties.
SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, fieldSpec, true, /*hasDictionary*/
dictionaryElementSize, true, /*hasInvertedIndex*/
null);
}
Aggregations