Search in sources :

Example 1 with ColumnIndexCreationInfo

use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.

the class SegmentIndexCreationDriverImpl method buildIndexCreationInfo.

/**
   * Complete the stats gathering process and store the stats information in indexCreationInfoMap.
   */
void buildIndexCreationInfo() throws Exception {
    for (FieldSpec spec : dataSchema.getAllFieldSpecs()) {
        String column = spec.getName();
        indexCreationInfoMap.put(column, new ColumnIndexCreationInfo(true, /*createDictionary*/
        segmentStats.getColumnProfileFor(column).getMinValue(), segmentStats.getColumnProfileFor(column).getMaxValue(), segmentStats.getColumnProfileFor(column).getUniqueValuesSet(), ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.ROARING_BITMAPS, segmentStats.getColumnProfileFor(column).isSorted(), segmentStats.getColumnProfileFor(column).hasNull(), segmentStats.getColumnProfileFor(column).getTotalNumberOfEntries(), segmentStats.getColumnProfileFor(column).getMaxNumberOfMultiValues(), segmentStats.getColumnProfileFor(column).getLengthOfLargestElement(), false, /*isAutoGenerated*/
        dataSchema.getFieldSpecFor(column).getDefaultNullValue()));
    }
    segmentIndexCreationInfo.setTotalDocs(totalDocs);
    segmentIndexCreationInfo.setTotalRawDocs(totalRawDocs);
    segmentIndexCreationInfo.setTotalAggDocs(totalAggDocs);
    segmentIndexCreationInfo.setStarTreeEnabled(createStarTree);
    segmentIndexCreationInfo.setTotalConversions(extractor.getTotalConversions());
    segmentIndexCreationInfo.setTotalErrors(extractor.getTotalErrors());
    segmentIndexCreationInfo.setTotalNullCols(extractor.getTotalNullCols());
    segmentIndexCreationInfo.setTotalNulls(extractor.getTotalNulls());
}
Also used : ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 2 with ColumnIndexCreationInfo

use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.

the class SegmentColumnarIndexCreator method init.

@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
    docIdCounter = 0;
    config = segmentCreationSpec;
    this.indexCreationInfoMap = indexCreationInfoMap;
    dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
    forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
    this.indexCreationInfoMap = indexCreationInfoMap;
    invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
    file = outDir;
    // Check that the output directory does not exist
    if (file.exists()) {
        throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
    }
    file.mkdir();
    this.schema = schema;
    this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
    this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
    this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
    this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
    this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
    this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
    this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
    this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
    // Initialize and build dictionaries
    for (final FieldSpec spec : schema.getAllFieldSpecs()) {
        String column = spec.getName();
        final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
        if (createDictionaryForColumn(info, config, spec)) {
            dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
        }
    }
    // For each column, build its dictionary and initialize a forwards and an inverted index
    for (final String column : indexCreationInfoMap.keySet()) {
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        boolean[] isSorted = new boolean[1];
        isSorted[0] = indexCreationInfo.isSorted();
        SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
        if (dictionaryCreator != null) {
            dictionaryCreator.build(isSorted);
            indexCreationInfo.setSorted(isSorted[0]);
            dictionaryCache.put(column, new HashMap<Object, Object>());
        }
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
        boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
        FieldSpec fieldSpec = schema.getFieldSpecFor(column);
        if (fieldSpec.isSingleValueField()) {
            // Raw indexes store actual values, instead of dictionary ids.
            if (buildRawIndex) {
                forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
            } else {
                if (indexCreationInfo.isSorted()) {
                    forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
                } else {
                    forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
                }
            }
        } else {
            if (buildRawIndex) {
                // TODO: Add support for multi-valued columns.
                throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
            }
            forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
        }
    }
    for (String column : config.getInvertedIndexCreationColumns()) {
        if (!schema.hasColumn(column)) {
            LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
            continue;
        }
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
        invertedIndexCreatorMap.put(column, invertedIndexCreator);
    }
}
Also used : SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator) SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator) ForwardIndexCreator(com.linkedin.pinot.core.segment.creator.ForwardIndexCreator) MultiValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) InvertedIndexCreator(com.linkedin.pinot.core.segment.creator.InvertedIndexCreator) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)

Example 3 with ColumnIndexCreationInfo

use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.

the class BaseDefaultColumnHandler method createColumnV1Indices.

/**
   * Helper method to create the V1 indices (dictionary and forward index) for a column.
   *
   * @param column column name.
   */
protected void createColumnV1Indices(String column) throws Exception {
    FieldSpec fieldSpec = _schema.getFieldSpecFor(column);
    Preconditions.checkNotNull(fieldSpec);
    // Generate column index creation information.
    int totalDocs = _segmentMetadata.getTotalDocs();
    int totalRawDocs = _segmentMetadata.getTotalRawDocs();
    int totalAggDocs = totalDocs - totalRawDocs;
    FieldSpec.DataType dataType = fieldSpec.getDataType();
    Object defaultValue = fieldSpec.getDefaultNullValue();
    boolean isSingleValue = fieldSpec.isSingleValueField();
    int maxNumberOfMultiValueElements = isSingleValue ? 0 : 1;
    int dictionaryElementSize = 0;
    Object sortedArray;
    switch(dataType) {
        case STRING:
            Preconditions.checkState(defaultValue instanceof String);
            String stringDefaultValue = (String) defaultValue;
            // Length of the UTF-8 encoded byte array.
            dictionaryElementSize = stringDefaultValue.getBytes("UTF8").length;
            sortedArray = new String[] { stringDefaultValue };
            break;
        case INT:
            Preconditions.checkState(defaultValue instanceof Integer);
            sortedArray = new int[] { (Integer) defaultValue };
            break;
        case LONG:
            Preconditions.checkState(defaultValue instanceof Long);
            sortedArray = new long[] { (Long) defaultValue };
            break;
        case FLOAT:
            Preconditions.checkState(defaultValue instanceof Float);
            sortedArray = new float[] { (Float) defaultValue };
            break;
        case DOUBLE:
            Preconditions.checkState(defaultValue instanceof Double);
            sortedArray = new double[] { (Double) defaultValue };
            break;
        default:
            throw new UnsupportedOperationException("Unsupported data type: " + dataType + " for column: " + column);
    }
    ColumnIndexCreationInfo columnIndexCreationInfo = new ColumnIndexCreationInfo(true, /*createDictionary*/
    defaultValue, /*min*/
    defaultValue, /*max*/
    sortedArray, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.SORTED_INDEX, isSingleValue, /*isSortedColumn*/
    false, /*hasNulls*/
    totalDocs, /*totalNumberOfEntries*/
    maxNumberOfMultiValueElements, -1, /* Unused max length*/
    true, /*isAutoGenerated*/
    defaultValue);
    // Create dictionary.
    // We will have only one value in the dictionary.
    SegmentDictionaryCreator segmentDictionaryCreator = new SegmentDictionaryCreator(false, /*hasNulls*/
    sortedArray, fieldSpec, _indexDir, V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
    segmentDictionaryCreator.build(new boolean[] { true });
    segmentDictionaryCreator.close();
    // Create forward index.
    if (isSingleValue) {
        // Single-value column.
        SingleValueSortedForwardIndexCreator svFwdIndexCreator = new SingleValueSortedForwardIndexCreator(_indexDir, 1, /*cardinality*/
        fieldSpec);
        for (int docId = 0; docId < totalDocs; docId++) {
            svFwdIndexCreator.add(0, /*dictionaryId*/
            docId);
        }
        svFwdIndexCreator.close();
    } else {
        // Multi-value column.
        MultiValueUnsortedForwardIndexCreator mvFwdIndexCreator = new MultiValueUnsortedForwardIndexCreator(fieldSpec, _indexDir, 1, /*cardinality*/
        totalDocs, /*numDocs*/
        totalDocs, /*totalNumberOfValues*/
        false);
        int[] dictionaryIds = { 0 };
        for (int docId = 0; docId < totalDocs; docId++) {
            mvFwdIndexCreator.index(docId, dictionaryIds);
        }
        mvFwdIndexCreator.close();
    }
    // Add the column metadata information to the metadata properties.
    SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, fieldSpec, true, /*hasDictionary*/
    dictionaryElementSize, true, /*hasInvertedIndex*/
    null);
}
Also used : SegmentDictionaryCreator(com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator) SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)

Example 4 with ColumnIndexCreationInfo

use of com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo in project pinot by linkedin.

the class SegmentColumnarIndexCreator method writeMetadata.

void writeMetadata() throws ConfigurationException {
    PropertiesConfiguration properties = new PropertiesConfiguration(new File(file, V1Constants.MetadataKeys.METADATA_FILE_NAME));
    properties.setProperty(SEGMENT_CREATOR_VERSION, config.getCreatorVersion());
    properties.setProperty(SEGMENT_PADDING_CHARACTER, StringEscapeUtils.escapeJava(Character.toString(config.getPaddingCharacter())));
    properties.setProperty(SEGMENT_NAME, segmentName);
    properties.setProperty(TABLE_NAME, config.getTableName());
    properties.setProperty(DIMENSIONS, config.getDimensions());
    properties.setProperty(METRICS, config.getMetrics());
    properties.setProperty(TIME_COLUMN_NAME, config.getTimeColumnName());
    properties.setProperty(TIME_INTERVAL, "not_there");
    properties.setProperty(SEGMENT_TOTAL_RAW_DOCS, String.valueOf(totalRawDocs));
    properties.setProperty(SEGMENT_TOTAL_AGGREGATE_DOCS, String.valueOf(totalAggDocs));
    properties.setProperty(SEGMENT_TOTAL_DOCS, String.valueOf(totalDocs));
    properties.setProperty(STAR_TREE_ENABLED, String.valueOf(config.isEnableStarTreeIndex()));
    properties.setProperty(SEGMENT_TOTAL_ERRORS, String.valueOf(totalErrors));
    properties.setProperty(SEGMENT_TOTAL_NULLS, String.valueOf(totalNulls));
    properties.setProperty(SEGMENT_TOTAL_CONVERSIONS, String.valueOf(totalConversions));
    properties.setProperty(SEGMENT_TOTAL_NULL_COLS, String.valueOf(totalNullCols));
    StarTreeIndexSpec starTreeIndexSpec = config.getStarTreeIndexSpec();
    if (starTreeIndexSpec != null) {
        properties.setProperty(STAR_TREE_SPLIT_ORDER, starTreeIndexSpec.getDimensionsSplitOrder());
        properties.setProperty(STAR_TREE_MAX_LEAF_RECORDS, starTreeIndexSpec.getMaxLeafRecords());
        properties.setProperty(STAR_TREE_SKIP_STAR_NODE_CREATION_FOR_DIMENSIONS, starTreeIndexSpec.getSkipStarNodeCreationForDimensions());
        properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_CARDINALITY, starTreeIndexSpec.getskipMaterializationCardinalityThreshold());
        properties.setProperty(STAR_TREE_SKIP_MATERIALIZATION_FOR_DIMENSIONS, starTreeIndexSpec.getskipMaterializationForDimensions());
    }
    HllConfig hllConfig = config.getHllConfig();
    Map<String, String> derivedHllFieldToOriginMap = null;
    if (hllConfig != null) {
        properties.setProperty(SEGMENT_HLL_LOG2M, hllConfig.getHllLog2m());
        derivedHllFieldToOriginMap = hllConfig.getDerivedHllFieldToOriginMap();
    }
    String timeColumn = config.getTimeColumnName();
    if (indexCreationInfoMap.get(timeColumn) != null) {
        properties.setProperty(SEGMENT_START_TIME, indexCreationInfoMap.get(timeColumn).getMin());
        properties.setProperty(SEGMENT_END_TIME, indexCreationInfoMap.get(timeColumn).getMax());
        properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
    }
    if (config.containsCustomProperty(SEGMENT_START_TIME)) {
        properties.setProperty(SEGMENT_START_TIME, config.getStartTime());
    }
    if (config.containsCustomProperty(SEGMENT_END_TIME)) {
        properties.setProperty(SEGMENT_END_TIME, config.getEndTime());
    }
    if (config.containsCustomProperty(TIME_UNIT)) {
        properties.setProperty(TIME_UNIT, config.getSegmentTimeUnit());
    }
    for (Map.Entry<String, String> entry : config.getCustomProperties().entrySet()) {
        properties.setProperty(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, ColumnIndexCreationInfo> entry : indexCreationInfoMap.entrySet()) {
        String column = entry.getKey();
        ColumnIndexCreationInfo columnIndexCreationInfo = entry.getValue();
        SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
        int dictionaryElementSize = (dictionaryCreator != null) ? dictionaryCreator.getStringColumnMaxLength() : 0;
        // TODO: after fixing the server-side dependency on HAS_INVERTED_INDEX and deployed, set HAS_INVERTED_INDEX properly
        // The hasInvertedIndex flag in segment metadata is picked up in ColumnMetadata, and will be used during the query
        // plan phase. If it is set to false, then inverted indexes are not used in queries even if they are created via table
        // configs on segment load. So, we set it to true here for now, until we fix the server to update the value inside
        // ColumnMetadata, export information to the query planner that the inverted index available is current and can be used.
        //
        //    boolean hasInvertedIndex = invertedIndexCreatorMap.containsKey();
        boolean hasInvertedIndex = true;
        String hllOriginColumn = null;
        if (derivedHllFieldToOriginMap != null) {
            hllOriginColumn = derivedHllFieldToOriginMap.get(column);
        }
        addColumnMetadataInfo(properties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, schema.getFieldSpecFor(column), dictionaryCreatorMap.containsKey(column), dictionaryElementSize, hasInvertedIndex, hllOriginColumn);
    }
    properties.save();
}
Also used : HllConfig(com.linkedin.pinot.core.startree.hll.HllConfig) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) StarTreeIndexSpec(com.linkedin.pinot.common.data.StarTreeIndexSpec) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ColumnIndexCreationInfo (com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo)4 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)3 MultiValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)2 SingleValueSortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator)2 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)1 StarTreeIndexSpec (com.linkedin.pinot.common.data.StarTreeIndexSpec)1 ForwardIndexCreator (com.linkedin.pinot.core.segment.creator.ForwardIndexCreator)1 InvertedIndexCreator (com.linkedin.pinot.core.segment.creator.InvertedIndexCreator)1 MultiValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator)1 SingleValueForwardIndexCreator (com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator)1 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)1 SingleValueUnsortedForwardIndexCreator (com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator)1 OffHeapBitmapInvertedIndexCreator (com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator)1 HllConfig (com.linkedin.pinot.core.startree.hll.HllConfig)1 File (java.io.File)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 PropertiesConfiguration (org.apache.commons.configuration.PropertiesConfiguration)1