use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.
the class SegmentIndexCreationDriverImpl method buildIndexCreationInfo.
/**
* Complete the stats gathering process and store the stats information in indexCreationInfoMap.
*/
void buildIndexCreationInfo() throws Exception {
for (FieldSpec spec : dataSchema.getAllFieldSpecs()) {
String column = spec.getName();
indexCreationInfoMap.put(column, new ColumnIndexCreationInfo(true, /*createDictionary*/
segmentStats.getColumnProfileFor(column).getMinValue(), segmentStats.getColumnProfileFor(column).getMaxValue(), segmentStats.getColumnProfileFor(column).getUniqueValuesSet(), ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.ROARING_BITMAPS, segmentStats.getColumnProfileFor(column).isSorted(), segmentStats.getColumnProfileFor(column).hasNull(), segmentStats.getColumnProfileFor(column).getTotalNumberOfEntries(), segmentStats.getColumnProfileFor(column).getMaxNumberOfMultiValues(), segmentStats.getColumnProfileFor(column).getLengthOfLargestElement(), false, /*isAutoGenerated*/
dataSchema.getFieldSpecFor(column).getDefaultNullValue()));
}
segmentIndexCreationInfo.setTotalDocs(totalDocs);
segmentIndexCreationInfo.setTotalRawDocs(totalRawDocs);
segmentIndexCreationInfo.setTotalAggDocs(totalAggDocs);
segmentIndexCreationInfo.setStarTreeEnabled(createStarTree);
segmentIndexCreationInfo.setTotalConversions(extractor.getTotalConversions());
segmentIndexCreationInfo.setTotalErrors(extractor.getTotalErrors());
segmentIndexCreationInfo.setTotalNullCols(extractor.getTotalNullCols());
segmentIndexCreationInfo.setTotalNulls(extractor.getTotalNulls());
}
use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.
the class SegmentColumnarIndexCreator method init.
@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
docIdCounter = 0;
config = segmentCreationSpec;
this.indexCreationInfoMap = indexCreationInfoMap;
dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
this.indexCreationInfoMap = indexCreationInfoMap;
invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
file = outDir;
// Check that the output directory does not exist
if (file.exists()) {
throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
}
file.mkdir();
this.schema = schema;
this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
// Initialize and build dictionaries
for (final FieldSpec spec : schema.getAllFieldSpecs()) {
String column = spec.getName();
final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
if (createDictionaryForColumn(info, config, spec)) {
dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
}
}
// For each column, build its dictionary and initialize a forwards and an inverted index
for (final String column : indexCreationInfoMap.keySet()) {
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
boolean[] isSorted = new boolean[1];
isSorted[0] = indexCreationInfo.isSorted();
SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
if (dictionaryCreator != null) {
dictionaryCreator.build(isSorted);
indexCreationInfo.setSorted(isSorted[0]);
dictionaryCache.put(column, new HashMap<Object, Object>());
}
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
FieldSpec fieldSpec = schema.getFieldSpecFor(column);
if (fieldSpec.isSingleValueField()) {
// Raw indexes store actual values, instead of dictionary ids.
if (buildRawIndex) {
forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
} else {
if (indexCreationInfo.isSorted()) {
forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
} else {
forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
} else {
if (buildRawIndex) {
// TODO: Add support for multi-valued columns.
throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
}
forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
}
}
for (String column : config.getInvertedIndexCreationColumns()) {
if (!schema.hasColumn(column)) {
LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
continue;
}
ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
invertedIndexCreatorMap.put(column, invertedIndexCreator);
}
}
use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.
the class BaseDefaultColumnHandler method createColumnV1Indices.
/**
* Helper method to create the V1 indices (dictionary and forward index) for a column.
*
* @param column column name.
*/
protected void createColumnV1Indices(String column) throws Exception {
FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
Preconditions.checkNotNull(fieldSpec);
// Generate column index creation information.
int totalDocs = _segmentMetadata.getTotalDocs();
int totalRawDocs = _segmentMetadata.getTotalRawDocs();
int totalAggDocs = totalDocs - totalRawDocs;
FieldSpec.DataType dataType = fieldSpec.getDataType();
Object defaultValue = fieldSpec.getDefaultNullValue();
boolean isSingleValue = fieldSpec.isSingleValueField();
int maxNumberOfMultiValueElements = isSingleValue ? 0 : 1;
int dictionaryElementSize = 0;
Object sortedArray;
switch(dataType) {
case STRING:
Preconditions.checkState(defaultValue instanceof String);
String stringDefaultValue = (String) defaultValue;
// Length of the UTF-8 encoded byte array.
dictionaryElementSize = stringDefaultValue.getBytes("UTF8").length;
sortedArray = new String[] { stringDefaultValue };
break;
case INT:
Preconditions.checkState(defaultValue instanceof Integer);
sortedArray = new int[] { (Integer) defaultValue };
break;
case LONG:
Preconditions.checkState(defaultValue instanceof Long);
sortedArray = new long[] { (Long) defaultValue };
break;
case FLOAT:
Preconditions.checkState(defaultValue instanceof Float);
sortedArray = new float[] { (Float) defaultValue };
break;
case DOUBLE:
Preconditions.checkState(defaultValue instanceof Double);
sortedArray = new double[] { (Double) defaultValue };
break;
default:
throw new UnsupportedOperationException("Unsupported data type: " + dataType + " for column: " + column);
}
ColumnIndexCreationInfo columnIndexCreationInfo = new ColumnIndexCreationInfo(true, /*createDictionary*/
defaultValue, /*min*/
defaultValue, /*max*/
sortedArray, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.SORTED_INDEX, isSingleValue, /*isSortedColumn*/
false, /*hasNulls*/
totalDocs, /*totalNumberOfEntries*/
maxNumberOfMultiValueElements, -1, /* Unused max length*/
true, /*isAutoGenerated*/
defaultValue);
// Create dictionary.
// We will have only one value in the dictionary.
SegmentDictionaryCreator segmentDictionaryCreator = new SegmentDictionaryCreator(false, /*hasNulls*/
sortedArray, fieldSpec, _indexDir, V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
segmentDictionaryCreator.build(new boolean[] { true });
segmentDictionaryCreator.close();
// Create forward index.
if (isSingleValue) {
// Single-value column.
SingleValueSortedForwardIndexCreator svFwdIndexCreator = new SingleValueSortedForwardIndexCreator(_indexDir, 1, /*cardinality*/
fieldSpec);
for (int docId = 0; docId < totalDocs; docId++) {
svFwdIndexCreator.add(0, /*dictionaryId*/
docId);
}
svFwdIndexCreator.close();
} else {
// Multi-value column.
MultiValueUnsortedForwardIndexCreator mvFwdIndexCreator = new MultiValueUnsortedForwardIndexCreator(fieldSpec, _indexDir, 1, /*cardinality*/
totalDocs, /*numDocs*/
totalDocs, /*totalNumberOfValues*/
false);
int[] dictionaryIds = { 0 };
for (int docId = 0; docId < totalDocs; docId++) {
mvFwdIndexCreator.index(docId, dictionaryIds);
}
mvFwdIndexCreator.close();
}
// Add the column metadata information to the metadata properties.
SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, fieldSpec, true, /*hasDictionary*/
dictionaryElementSize, true, /*hasInvertedIndex*/
null);
}
use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.
the class SegmentColumnarIndexCreator method writeMetadata.
void writeMetadata() throws ConfigurationException {
PropertiesConfiguration properties = new PropertiesConfiguration(new File(file, V1Constants.MetadataKeys.METADATA_FILE_NAME));
properties.setProperty(SEGMENT_CREATOR_VERSION, config.getCreatorVersion());
properties.setProperty(SEGMENT_PADDING_CHARACTER, StringEscapeUtils.escapeJava(Character.toString(config.getPaddingCharacter())));
properties.setProperty(SEGMENT_NAME, segmentName);
properties.setProperty(TABLE_NAME, config.getTableName());
properties.setProperty(DIMENSIONS, config.getDimensions());
properties.setProperty(METRICS, config.getMetrics());
properties.setProperty(TIME_COLUMN_NAME, config.getTimeColumnName());
properties.setProperty(TIME_INTERVAL, "not_there");
properties.setProperty(SEGMENT_TOTAL_RAW_DOCS, String.valueOf(totalRawDocs));
properties.setProperty(SEGMENT_TOTAL_AGGREGATE_DOCS, String.valueOf(totalAggDocs));
properties.setProperty(SEGMENT_TOTAL_DOCS, String.valueOf(totalDocs));
properties.setProperty(STAR_TREE_ENABLED, String.valueOf(config.isEnableStarTreeIndex()));
properties.setProperty(SEGMENT_TOTAL_ERRORS, String.valueOf(totalErrors));
properties.setProperty(SEGMENT_TOTAL_NULLS, String.valueOf(totalNulls));
properties.setProperty(SEGMENT_TOTAL_CONVERSIONS, String.valueOf(totalConversions));
properties.setProperty(SEGMENT_TOTAL_NULL_COLS, String.valueOf(totalNullCols));
StarTreeIndexSpec starTreeIndexSpec = config.getStarTreeIndexSpec();
if (starTreeIndexSpec != null) {
properties.setProperty(STAR_TREE_SPLIT_ORDER, starTreeIndexSpec.getDimensionsSplitOrder());
properties.setProperty(STAR_TREE_MAX_LEAF_RECORDS, starTreeIndexSpec.getMaxLeafRecords());
properties.setProperty(STAR_TREE_SKIP_STAR_NODE_CREATION_FOR_DIMENSIONS, starTreeIndexSpec.getSkipStarNodeCreationForDimensions());
properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_CARDINALITY, starTreeIndexSpec.getskipMaterializationCardinalityThreshold());
properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_FOR_DIMENSIONS, starTreeIndexSpec.getskipMaterializationForDimensions());
}
HllConfig hllConfig = config.getHllConfig();
Map<String, String> derivedHllFieldToOriginMap = null;
if (hllConfig != null) {
properties.setProperty(SEGMENT_HLL_LOG2M, hllConfig.getHllLog2m());
derivedHllFieldToOriginMap = hllConfig.getDerivedHllFieldToOriginMap();
}
String timeColumn = config.getTimeColumnName();
if (indexCreationInfoMap.get(timeColumn) != null) {
properties.setProperty(SEGMENT_START_TIME, indexCreationInfoMap.get(timeColumn).getMin());
properties.setProperty(SEGMENT_END_TIME, indexCreationInfoMap.get(timeColumn).getMax());
properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
}
if (config.containsCustomProperty(SEGMENT_START_TIME)) {
properties.setProperty(SEGMENT_START_TIME, config.getStartTime());
}
if (config.containsCustomProperty(SEGMENT_END_TIME)) {
properties.setProperty(SEGMENT_END_TIME, config.getEndTime());
}
if (config.containsCustomProperty(TIME_UNIT)) {
properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
}
for (Map.Entry<String, String> entry : config.getCustomProperties().entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
for (Map.Entry<String, ColumnIndexCreationInfo> entry : indexCreationInfoMap.entrySet()) {
String column = entry.getKey();
ColumnIndexCreationInfo columnIndexCreationInfo = entry.getValue();
SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
int dictionaryElementSize = (dictionaryCreator != null) ? dictionaryCreator.getStringColumnMaxLength() : 0;
// TODO: after fixing the server-side dependency on HAS_INVERTED_INDEX and deployed, set HAS_INVERTED_INDEX properly
// The hasInvertedIndex flag in segment metadata is picked up in ColumnMetadata, and will be used during the query
// plan phase. If it is set to false, then inverted indexes are not used in queries even if they are created via table
// configs on segment load. So, we set it to true here for now, until we fix the server to update the value inside
// ColumnMetadata, export information to the query planner that the inverted index available is current and can be used.
//
// boolean hasInvertedIndex = invertedIndexCreatorMap.containsKey();
boolean hasInvertedIndex = true;
String hllOriginColumn = null;
if (derivedHllFieldToOriginMap != null) {
hllOriginColumn = derivedHllFieldToOriginMap.get(column);
}
addColumnMetadataInfo(properties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, schema.getFieldSpecFor(column), dictionaryCreatorMap.containsKey(column), dictionaryElementSize, hasInvertedIndex, hllOriginColumn);
}
properties.save();
}
Aggregations