Search in sources :

Example 11 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class SegmentIndexCreationDriverImpl method buildIndexCreationInfo.

/**
   * Complete the stats gathering process and store the stats information in indexCreationInfoMap.
   */
void buildIndexCreationInfo() throws Exception {
    for (FieldSpec spec : dataSchema.getAllFieldSpecs()) {
        String column = spec.getName();
        indexCreationInfoMap.put(column, new ColumnIndexCreationInfo(true, /*createDictionary*/
        segmentStats.getColumnProfileFor(column).getMinValue(), segmentStats.getColumnProfileFor(column).getMaxValue(), segmentStats.getColumnProfileFor(column).getUniqueValuesSet(), ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.ROARING_BITMAPS, segmentStats.getColumnProfileFor(column).isSorted(), segmentStats.getColumnProfileFor(column).hasNull(), segmentStats.getColumnProfileFor(column).getTotalNumberOfEntries(), segmentStats.getColumnProfileFor(column).getMaxNumberOfMultiValues(), segmentStats.getColumnProfileFor(column).getLengthOfLargestElement(), false, /*isAutoGenerated*/
        dataSchema.getFieldSpecFor(column).getDefaultNullValue()));
    }
    segmentIndexCreationInfo.setTotalDocs(totalDocs);
    segmentIndexCreationInfo.setTotalRawDocs(totalRawDocs);
    segmentIndexCreationInfo.setTotalAggDocs(totalAggDocs);
    segmentIndexCreationInfo.setStarTreeEnabled(createStarTree);
    segmentIndexCreationInfo.setTotalConversions(extractor.getTotalConversions());
    segmentIndexCreationInfo.setTotalErrors(extractor.getTotalErrors());
    segmentIndexCreationInfo.setTotalNullCols(extractor.getTotalNullCols());
    segmentIndexCreationInfo.setTotalNulls(extractor.getTotalNulls());
}
Also used : ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 12 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class SegmentColumnarIndexCreator method init.

@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, Map<String, ColumnIndexCreationInfo> indexCreationInfoMap, Schema schema, File outDir) throws Exception {
    docIdCounter = 0;
    config = segmentCreationSpec;
    this.indexCreationInfoMap = indexCreationInfoMap;
    dictionaryCreatorMap = new HashMap<String, SegmentDictionaryCreator>();
    forwardIndexCreatorMap = new HashMap<String, ForwardIndexCreator>();
    this.indexCreationInfoMap = indexCreationInfoMap;
    invertedIndexCreatorMap = new HashMap<String, InvertedIndexCreator>();
    file = outDir;
    // Check that the output directory does not exist
    if (file.exists()) {
        throw new RuntimeException("Segment output directory " + file.getAbsolutePath() + " already exists.");
    }
    file.mkdir();
    this.schema = schema;
    this.totalDocs = segmentIndexCreationInfo.getTotalDocs();
    this.totalAggDocs = segmentIndexCreationInfo.getTotalAggDocs();
    this.totalRawDocs = segmentIndexCreationInfo.getTotalRawDocs();
    this.totalErrors = segmentIndexCreationInfo.getTotalErrors();
    this.totalNulls = segmentIndexCreationInfo.getTotalNulls();
    this.totalConversions = segmentIndexCreationInfo.getTotalConversions();
    this.totalNullCols = segmentIndexCreationInfo.getTotalNullCols();
    this.paddingCharacter = segmentCreationSpec.getPaddingCharacter();
    // Initialize and build dictionaries
    for (final FieldSpec spec : schema.getAllFieldSpecs()) {
        String column = spec.getName();
        final ColumnIndexCreationInfo info = indexCreationInfoMap.get(column);
        if (createDictionaryForColumn(info, config, spec)) {
            dictionaryCreatorMap.put(column, new SegmentDictionaryCreator(info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file, paddingCharacter));
        }
    }
    // For each column, build its dictionary and initialize a forwards and an inverted index
    for (final String column : indexCreationInfoMap.keySet()) {
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        boolean[] isSorted = new boolean[1];
        isSorted[0] = indexCreationInfo.isSorted();
        SegmentDictionaryCreator dictionaryCreator = dictionaryCreatorMap.get(column);
        if (dictionaryCreator != null) {
            dictionaryCreator.build(isSorted);
            indexCreationInfo.setSorted(isSorted[0]);
            dictionaryCache.put(column, new HashMap<Object, Object>());
        }
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        int maxLength = indexCreationInfo.getLegnthOfLongestEntry();
        boolean buildRawIndex = config.getRawIndexCreationColumns().contains(column);
        FieldSpec fieldSpec = schema.getFieldSpecFor(column);
        if (fieldSpec.isSingleValueField()) {
            // Raw indexes store actual values, instead of dictionary ids.
            if (buildRawIndex) {
                forwardIndexCreatorMap.put(column, getRawIndexCreatorForColumn(file, column, fieldSpec.getDataType(), totalDocs, maxLength));
            } else {
                if (indexCreationInfo.isSorted()) {
                    forwardIndexCreatorMap.put(column, new SingleValueSortedForwardIndexCreator(file, uniqueValueCount, fieldSpec));
                } else {
                    forwardIndexCreatorMap.put(column, new SingleValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
                }
            }
        } else {
            if (buildRawIndex) {
                // TODO: Add support for multi-valued columns.
                throw new RuntimeException("Raw index generation not supported for multi-valued columns: " + column);
            }
            forwardIndexCreatorMap.put(column, new MultiValueUnsortedForwardIndexCreator(fieldSpec, file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), indexCreationInfo.hasNulls()));
        }
    }
    for (String column : config.getInvertedIndexCreationColumns()) {
        if (!schema.hasColumn(column)) {
            LOGGER.warn("Skipping enabling index on column:{} since its missing in schema", column);
            continue;
        }
        ColumnIndexCreationInfo indexCreationInfo = indexCreationInfoMap.get(column);
        int uniqueValueCount = indexCreationInfo.getDistinctValueCount();
        OffHeapBitmapInvertedIndexCreator invertedIndexCreator = new OffHeapBitmapInvertedIndexCreator(file, uniqueValueCount, totalDocs, indexCreationInfo.getTotalNumberOfEntries(), schema.getFieldSpecFor(column));
        invertedIndexCreatorMap.put(column, invertedIndexCreator);
    }
}
Also used : SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.SingleValueForwardIndexCreator) SingleValueSortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator) ForwardIndexCreator(com.linkedin.pinot.core.segment.creator.ForwardIndexCreator) MultiValueForwardIndexCreator(com.linkedin.pinot.core.segment.creator.MultiValueForwardIndexCreator) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) InvertedIndexCreator(com.linkedin.pinot.core.segment.creator.InvertedIndexCreator) OffHeapBitmapInvertedIndexCreator(com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator) ColumnIndexCreationInfo(com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo) SingleValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueUnsortedForwardIndexCreator) MultiValueUnsortedForwardIndexCreator(com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator)

Example 13 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class RealtimeMetricsSerDe method serialize.

public ByteBuffer serialize(GenericRow row) {
    ByteBuffer metricBuff = ByteBuffer.allocate(metricBuffSizeInBytes);
    for (String metric : schema.getMetricNames()) {
        Object entry = row.getValue(metric);
        FieldSpec spec = schema.getFieldSpecFor(metric);
        switch(spec.getDataType()) {
            case INT:
                metricBuff.putInt((Integer) entry);
                break;
            case LONG:
                metricBuff.putLong((Long) entry);
                break;
            case FLOAT:
                metricBuff.putFloat((Float) entry);
                break;
            case DOUBLE:
                metricBuff.putDouble((Double) entry);
                break;
        }
    }
    return metricBuff;
}
Also used : ByteBuffer(java.nio.ByteBuffer) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 14 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class RealtimeFileBasedReaderTest method testDataSourceWithoutPredicateForSingleValueTimeColumns.

private void testDataSourceWithoutPredicateForSingleValueTimeColumns() {
    for (FieldSpec spec : schema.getAllFieldSpecs()) {
        if (spec.isSingleValueField() && spec.getFieldType() == FieldType.TIME) {
            DataSource offlineDS = offlineSegment.getDataSource(spec.getName());
            DataSource realtimeDS = realtimeSegment.getDataSource(spec.getName());
            Block offlineBlock = offlineDS.nextBlock();
            Block realtimeBlock = realtimeDS.nextBlock();
            BlockMetadata offlineMetadata = offlineBlock.getMetadata();
            BlockMetadata realtimeMetadata = realtimeBlock.getMetadata();
            BlockSingleValIterator offlineValIterator = (BlockSingleValIterator) offlineBlock.getBlockValueSet().iterator();
            BlockSingleValIterator realtimeValIterator = (BlockSingleValIterator) realtimeBlock.getBlockValueSet().iterator();
            Assert.assertEquals(offlineSegment.getSegmentMetadata().getTotalDocs(), realtimeSegment.getAggregateDocumentCount());
            while (realtimeValIterator.hasNext()) {
                int offlineDicId = offlineValIterator.nextIntVal();
                int realtimeDicId = realtimeValIterator.nextIntVal();
                Assert.assertEquals(offlineMetadata.getDictionary().get(offlineDicId), realtimeMetadata.getDictionary().get(realtimeDicId));
            }
            Assert.assertEquals(offlineValIterator.hasNext(), realtimeValIterator.hasNext());
        }
    }
}
Also used : BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) BlockMetadata(com.linkedin.pinot.core.common.BlockMetadata) Block(com.linkedin.pinot.core.common.Block) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource)

Example 15 with FieldSpec

use of com.linkedin.pinot.common.data.FieldSpec in project pinot by linkedin.

the class RealtimeFileBasedReaderTest method testDataSourceWithoutPredicateForSingleValueMetricColumns.

private void testDataSourceWithoutPredicateForSingleValueMetricColumns() {
    for (FieldSpec spec : schema.getAllFieldSpecs()) {
        if (spec.isSingleValueField() && spec.getFieldType() == FieldType.METRIC) {
            DataSource offlineDS = offlineSegment.getDataSource(spec.getName());
            DataSource realtimeDS = realtimeSegment.getDataSource(spec.getName());
            Block offlineBlock = offlineDS.nextBlock();
            Block realtimeBlock = realtimeDS.nextBlock();
            BlockMetadata offlineMetadata = offlineBlock.getMetadata();
            BlockMetadata realtimeMetadata = realtimeBlock.getMetadata();
            BlockSingleValIterator offlineValIterator = (BlockSingleValIterator) offlineBlock.getBlockValueSet().iterator();
            BlockSingleValIterator realtimeValIterator = (BlockSingleValIterator) realtimeBlock.getBlockValueSet().iterator();
            Assert.assertEquals(offlineSegment.getSegmentMetadata().getTotalDocs(), realtimeSegment.getAggregateDocumentCount());
            while (realtimeValIterator.hasNext()) {
                int offlineDicId = offlineValIterator.nextIntVal();
                int realtimeDicId = realtimeValIterator.nextIntVal();
                Object value;
                if (realtimeMetadata.hasDictionary()) {
                    value = realtimeMetadata.getDictionary().get(realtimeDicId);
                } else {
                    value = realtimeDicId;
                }
                Assert.assertEquals(offlineMetadata.getDictionary().get(offlineDicId), value);
            }
            Assert.assertEquals(offlineValIterator.hasNext(), realtimeValIterator.hasNext());
        }
    }
}
Also used : BlockSingleValIterator(com.linkedin.pinot.core.common.BlockSingleValIterator) BlockMetadata(com.linkedin.pinot.core.common.BlockMetadata) Block(com.linkedin.pinot.core.common.Block) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataSource(com.linkedin.pinot.core.common.DataSource)

Aggregations

FieldSpec (com.linkedin.pinot.common.data.FieldSpec)52 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)28 Test (org.testng.annotations.Test)15 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)14 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)13 File (java.io.File)11 Schema (com.linkedin.pinot.common.data.Schema)10 SegmentDictionaryCreator (com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator)7 HashMap (java.util.HashMap)7 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)6 AbstractColumnStatisticsCollector (com.linkedin.pinot.core.segment.creator.AbstractColumnStatisticsCollector)6 Random (java.util.Random)5 Block (com.linkedin.pinot.core.common.Block)4 BlockMetadata (com.linkedin.pinot.core.common.BlockMetadata)4 DataSource (com.linkedin.pinot.core.common.DataSource)4 GenericRow (com.linkedin.pinot.core.data.GenericRow)4 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)4 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)4 ArrayList (java.util.ArrayList)4 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)3