Search in sources :

Example 11 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class ForwardIndexReaderBenchmark method benchmarkForwardIndex.

private static void benchmarkForwardIndex(String indexDir, List<String> includeColumns) throws Exception {
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(new File(indexDir));
    String segmentVersion = segmentMetadata.getVersion();
    Set<String> columns = segmentMetadata.getAllColumns();
    for (String column : columns) {
        if (includeColumns != null && !includeColumns.isEmpty()) {
            if (!includeColumns.contains(column)) {
                continue;
            }
        }
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        if (columnMetadata.isSingleValue()) {
            continue;
        }
        if (!columnMetadata.isSingleValue()) {
            String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
            File fwdIndexFile = new File(indexDir, fwdIndexFileName);
            multiValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getMaxNumberOfMultiValues(), columnMetadata.getBitsPerElement());
        } else if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
            String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
            File fwdIndexFile = new File(indexDir, fwdIndexFileName);
            singleValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getBitsPerElement());
        }
    }
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File)

Example 12 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class PinotSegmentRecordReader method getSchema.

@Override
public Schema getSchema() {
    Schema schema = new Schema();
    schema.setSchemaName(segmentMetadata.getName());
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        String columnName = columnMetadata.getColumnName();
        DataType dataType = columnMetadata.getDataType();
        FieldType fieldType = columnMetadata.getFieldType();
        FieldSpec fieldSpec = null;
        switch(fieldType) {
            case DIMENSION:
                boolean isSingleValue = columnMetadata.isSingleValue();
                fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
                break;
            case METRIC:
                fieldSpec = new MetricFieldSpec(columnName, dataType);
                break;
            case TIME:
                TimeUnit timeType = columnMetadata.getTimeUnit();
                TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
                fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
                break;
            default:
                break;
        }
        schema.addField(fieldSpec);
    }
    return schema;
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) TimeUnit(java.util.concurrent.TimeUnit) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 13 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class IntArraysTest method test1.

@Test
public void test1() throws Exception {
    final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR.listFiles()[0], ReadMode.heap);
    final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR.listFiles()[0], ReadMode.mmap);
    final Map<String, ColumnMetadata> metadataMap = ((SegmentMetadataImpl) heapSegment.getSegmentMetadata()).getColumnMetadataMap();
    for (final String column : metadataMap.keySet()) {
        final DataFileReader heapArray = heapSegment.getForwardIndexReaderFor(column);
        final DataFileReader mmapArray = mmapSegment.getForwardIndexReaderFor(column);
        if (metadataMap.get(column).isSingleValue()) {
            final SingleColumnSingleValueReader svHeapReader = (SingleColumnSingleValueReader) heapArray;
            final SingleColumnSingleValueReader mvMmapReader = (SingleColumnSingleValueReader) mmapArray;
            for (int i = 0; i < metadataMap.get(column).getTotalDocs(); i++) {
                Assert.assertEquals(mvMmapReader.getInt(i), svHeapReader.getInt(i));
            }
        } else {
            final SingleColumnMultiValueReader svHeapReader = (SingleColumnMultiValueReader) heapArray;
            final SingleColumnMultiValueReader mvMmapReader = (SingleColumnMultiValueReader) mmapArray;
            for (int i = 0; i < metadataMap.get(column).getTotalDocs(); i++) {
                final int[] i_1 = new int[1000];
                final int[] j_i = new int[1000];
                Assert.assertEquals(mvMmapReader.getIntArray(i, j_i), svHeapReader.getIntArray(i, i_1));
            }
        }
    }
}
Also used : SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) DataFileReader(com.linkedin.pinot.core.io.reader.DataFileReader) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) Test(org.testng.annotations.Test)

Example 14 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class LoadersTest method testPadding.

@Test
public void testPadding() throws Exception {
    // Old Format
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_OLD))), INDEX_DIR);
    File segmentDirectory = new File(INDEX_DIR, "paddingOld");
    SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
    SegmentDirectory segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    ColumnMetadata columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    SegmentDirectory.Reader reader = segmentDir.createReader();
    PinotDataBuffer dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    StringDictionary dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
    Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
    Assert.assertEquals(dict.get(0), "lynda 2.0");
    Assert.assertEquals(dict.get(1), "lynda");
    Assert.assertEquals(dict.indexOf("lynda%"), 1);
    Assert.assertEquals(dict.indexOf("lynda%%"), 1);
    // New Format Padding character %
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_PERCENT))), INDEX_DIR);
    segmentDirectory = new File(INDEX_DIR, "paddingPercent");
    originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
    segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    reader = segmentDir.createReader();
    dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
    Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
    Assert.assertEquals(dict.get(0), "lynda 2.0");
    Assert.assertEquals(dict.get(1), "lynda");
    Assert.assertEquals(dict.indexOf("lynda%"), 1);
    Assert.assertEquals(dict.indexOf("lynda%%"), 1);
    // New Format Padding character Null
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_NULL))), INDEX_DIR);
    segmentDirectory = new File(INDEX_DIR, "paddingNull");
    originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
    segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    reader = segmentDir.createReader();
    dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda\0\0\0\0");
    Assert.assertEquals(dict.getStringValue(1), "lynda 2.0");
    Assert.assertEquals(dict.get(0), "lynda");
    Assert.assertEquals(dict.get(1), "lynda 2.0");
    Assert.assertEquals(dict.indexOf("lynda\0"), 0);
    Assert.assertEquals(dict.indexOf("lynda\0\0"), 0);
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) File(java.io.File) StringDictionary(com.linkedin.pinot.core.segment.index.readers.StringDictionary) Test(org.testng.annotations.Test)

Example 15 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class SegmentPreProcessorTest method checkUpdateDefaultColumns.

private void checkUpdateDefaultColumns(File segmentDirectoryFile) throws Exception {
    // Update default value.
    SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, _newColumnsSchema1);
    processor.process();
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(segmentDirectoryFile);
    // Check column metadata.
    // Check all field for one column, and do necessary checks for other columns.
    ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_METRIC_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getCardinality(), 1);
    Assert.assertEquals(columnMetadata.getTotalDocs(), 100000);
    Assert.assertEquals(columnMetadata.getTotalRawDocs(), 100000);
    Assert.assertEquals(columnMetadata.getTotalAggDocs(), 0);
    Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.INT);
    Assert.assertEquals(columnMetadata.getBitsPerElement(), 1);
    Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 0);
    Assert.assertEquals(columnMetadata.getFieldType(), FieldSpec.FieldType.METRIC);
    Assert.assertTrue(columnMetadata.isSorted());
    Assert.assertFalse(columnMetadata.hasNulls());
    Assert.assertTrue(columnMetadata.hasDictionary());
    Assert.assertTrue(columnMetadata.hasInvertedIndex());
    Assert.assertTrue(columnMetadata.isSingleValue());
    Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 0);
    Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
    Assert.assertTrue(columnMetadata.isAutoGenerated());
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "1");
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_LONG_METRIC_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.LONG);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0");
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_FLOAT_METRIC_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.FLOAT);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0.0");
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_DOUBLE_METRIC_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.DOUBLE);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0.0");
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.STRING);
    Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 5);
    Assert.assertEquals(columnMetadata.getFieldType(), FieldSpec.FieldType.DIMENSION);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "false");
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_SV_DIMENSION_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.INT);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), String.valueOf(Integer.MIN_VALUE));
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.STRING);
    Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 4);
    Assert.assertFalse(columnMetadata.isSorted());
    Assert.assertFalse(columnMetadata.isSingleValue());
    Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 1);
    Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "null");
    // Check dictionary and forward index exist.
    try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentDirectoryFile, segmentMetadata, ReadMode.mmap);
        SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
        Assert.assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
        Assert.assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
        Assert.assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
        Assert.assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
        Assert.assertTrue(reader.hasIndexFor(NEW_FLOAT_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
        Assert.assertTrue(reader.hasIndexFor(NEW_FLOAT_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
        Assert.assertTrue(reader.hasIndexFor(NEW_DOUBLE_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
        Assert.assertTrue(reader.hasIndexFor(NEW_DOUBLE_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
        Assert.assertTrue(reader.hasIndexFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
        Assert.assertTrue(reader.hasIndexFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
        Assert.assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
        Assert.assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
        Assert.assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
        Assert.assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
    }
    // Use the second schema and update default value again.
    // For the second schema, we changed the default value for column 'newIntMetric' to 2, and added default value
    // 'abcd' (keep the same length as 'null') to column 'newStringMVDimension'.
    processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, _newColumnsSchema2);
    processor.process();
    segmentMetadata = new SegmentMetadataImpl(segmentDirectoryFile);
    // Check column metadata.
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_METRIC_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "2");
    columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME);
    Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "abcd");
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory)

Aggregations

ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)16 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)10 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)5 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)5 File (java.io.File)5 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 Test (org.testng.annotations.Test)4 SingleColumnMultiValueReader (com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)3 SingleColumnSingleValueReader (com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader)3 ImmutableDictionaryReader (com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)3 HashMap (java.util.HashMap)3 FilterQueryTree (com.linkedin.pinot.common.utils.request.FilterQueryTree)2 IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)2 ColumnIndexContainer (com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer)2 BitmapInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader)2 IntDictionary (com.linkedin.pinot.core.segment.index.readers.IntDictionary)2 StringDictionary (com.linkedin.pinot.core.segment.index.readers.StringDictionary)2 PropertiesConfiguration (org.apache.commons.configuration.PropertiesConfiguration)2 ImmutableRoaringBitmap (org.roaringbitmap.buffer.ImmutableRoaringBitmap)2 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)1