Search in sources :

Example 1 with StringDictionary

use of com.linkedin.pinot.core.segment.index.readers.StringDictionary in project pinot by linkedin.

the class ColumnMinMaxValueGenerator method addColumnMinMaxValueForColumn.

private void addColumnMinMaxValueForColumn(String columnName) throws Exception {
    // Skip column without dictionary or with min/max value already set
    ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(columnName);
    if ((!columnMetadata.hasDictionary()) || (columnMetadata.getMinValue() != null)) {
        return;
    }
    PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, ColumnIndexType.DICTIONARY);
    FieldSpec.DataType dataType = columnMetadata.getDataType();
    switch(dataType) {
        case INT:
            IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, columnMetadata);
            SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, intDictionary.getStringValue(0), intDictionary.getStringValue(intDictionary.length() - 1));
            break;
        case LONG:
            LongDictionary longDictionary = new LongDictionary(dictionaryBuffer, columnMetadata);
            SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, longDictionary.getStringValue(0), longDictionary.getStringValue(longDictionary.length() - 1));
            break;
        case FLOAT:
            FloatDictionary floatDictionary = new FloatDictionary(dictionaryBuffer, columnMetadata);
            SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, floatDictionary.getStringValue(0), floatDictionary.getStringValue(floatDictionary.length() - 1));
            break;
        case DOUBLE:
            DoubleDictionary doubleDictionary = new DoubleDictionary(dictionaryBuffer, columnMetadata);
            SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, doubleDictionary.getStringValue(0), doubleDictionary.getStringValue(doubleDictionary.length() - 1));
            break;
        case STRING:
            StringDictionary stringDictionary = new StringDictionary(dictionaryBuffer, columnMetadata);
            SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, stringDictionary.get(0), stringDictionary.get(stringDictionary.length() - 1));
            break;
        default:
            throw new IllegalStateException("Unsupported data type: " + dataType + " for column: " + columnName);
    }
    _minMaxValueAdded = true;
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) LongDictionary(com.linkedin.pinot.core.segment.index.readers.LongDictionary) FloatDictionary(com.linkedin.pinot.core.segment.index.readers.FloatDictionary) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) DoubleDictionary(com.linkedin.pinot.core.segment.index.readers.DoubleDictionary) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) IntDictionary(com.linkedin.pinot.core.segment.index.readers.IntDictionary) StringDictionary(com.linkedin.pinot.core.segment.index.readers.StringDictionary)

Example 2 with StringDictionary

use of com.linkedin.pinot.core.segment.index.readers.StringDictionary in project pinot by linkedin.

the class DictionariesTest method test1.

@Test
public void test1() throws Exception {
    final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.heap);
    final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.mmap);
    for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
        final ImmutableDictionaryReader heapDictionary = heapSegment.getDictionaryFor(column);
        final ImmutableDictionaryReader mmapDictionary = mmapSegment.getDictionaryFor(column);
        switch(((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().get(column).getDataType()) {
            case BOOLEAN:
            case STRING:
                Assert.assertTrue(heapDictionary instanceof StringDictionary);
                Assert.assertTrue(mmapDictionary instanceof StringDictionary);
                break;
            case DOUBLE:
                Assert.assertTrue(heapDictionary instanceof DoubleDictionary);
                Assert.assertTrue(mmapDictionary instanceof DoubleDictionary);
                break;
            case FLOAT:
                Assert.assertTrue(heapDictionary instanceof FloatDictionary);
                Assert.assertTrue(mmapDictionary instanceof FloatDictionary);
                break;
            case LONG:
                Assert.assertTrue(heapDictionary instanceof LongDictionary);
                Assert.assertTrue(mmapDictionary instanceof LongDictionary);
                break;
            case INT:
                Assert.assertTrue(heapDictionary instanceof IntDictionary);
                Assert.assertTrue(mmapDictionary instanceof IntDictionary);
                break;
        }
        Assert.assertEquals(mmapDictionary.length(), heapDictionary.length());
        for (int i = 0; i < heapDictionary.length(); i++) {
            Assert.assertEquals(mmapDictionary.get(i), heapDictionary.get(i));
        }
    }
}
Also used : IndexSegmentImpl(com.linkedin.pinot.core.segment.index.IndexSegmentImpl) FloatDictionary(com.linkedin.pinot.core.segment.index.readers.FloatDictionary) LongDictionary(com.linkedin.pinot.core.segment.index.readers.LongDictionary) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) DoubleDictionary(com.linkedin.pinot.core.segment.index.readers.DoubleDictionary) StringDictionary(com.linkedin.pinot.core.segment.index.readers.StringDictionary) IntDictionary(com.linkedin.pinot.core.segment.index.readers.IntDictionary) Test(org.testng.annotations.Test)

Example 3 with StringDictionary

use of com.linkedin.pinot.core.segment.index.readers.StringDictionary in project pinot by linkedin.

the class LoadersTest method testPadding.

@Test
public void testPadding() throws Exception {
    // Old Format
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_OLD))), INDEX_DIR);
    File segmentDirectory = new File(INDEX_DIR, "paddingOld");
    SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
    SegmentDirectory segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    ColumnMetadata columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    SegmentDirectory.Reader reader = segmentDir.createReader();
    PinotDataBuffer dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    StringDictionary dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
    Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
    Assert.assertEquals(dict.get(0), "lynda 2.0");
    Assert.assertEquals(dict.get(1), "lynda");
    Assert.assertEquals(dict.indexOf("lynda%"), 1);
    Assert.assertEquals(dict.indexOf("lynda%%"), 1);
    // New Format Padding character %
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_PERCENT))), INDEX_DIR);
    segmentDirectory = new File(INDEX_DIR, "paddingPercent");
    originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
    segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    reader = segmentDir.createReader();
    dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
    Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
    Assert.assertEquals(dict.get(0), "lynda 2.0");
    Assert.assertEquals(dict.get(1), "lynda");
    Assert.assertEquals(dict.indexOf("lynda%"), 1);
    Assert.assertEquals(dict.indexOf("lynda%%"), 1);
    // New Format Padding character Null
    TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_NULL))), INDEX_DIR);
    segmentDirectory = new File(INDEX_DIR, "paddingNull");
    originalMetadata = new SegmentMetadataImpl(segmentDirectory);
    Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
    segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
    columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
    reader = segmentDir.createReader();
    dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
    dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
    Assert.assertEquals(dict.getStringValue(0), "lynda\0\0\0\0");
    Assert.assertEquals(dict.getStringValue(1), "lynda 2.0");
    Assert.assertEquals(dict.get(0), "lynda");
    Assert.assertEquals(dict.get(1), "lynda 2.0");
    Assert.assertEquals(dict.indexOf("lynda\0"), 0);
    Assert.assertEquals(dict.indexOf("lynda\0\0"), 0);
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) File(java.io.File) StringDictionary(com.linkedin.pinot.core.segment.index.readers.StringDictionary) Test(org.testng.annotations.Test)

Aggregations

StringDictionary (com.linkedin.pinot.core.segment.index.readers.StringDictionary)3 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)2 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)2 DoubleDictionary (com.linkedin.pinot.core.segment.index.readers.DoubleDictionary)2 FloatDictionary (com.linkedin.pinot.core.segment.index.readers.FloatDictionary)2 IntDictionary (com.linkedin.pinot.core.segment.index.readers.IntDictionary)2 LongDictionary (com.linkedin.pinot.core.segment.index.readers.LongDictionary)2 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)2 Test (org.testng.annotations.Test)2 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)1 IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)1 ImmutableDictionaryReader (com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)1 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)1 File (java.io.File)1