use of com.linkedin.pinot.core.segment.index.readers.StringDictionary in project pinot by linkedin.
the class ColumnMinMaxValueGenerator method addColumnMinMaxValueForColumn.
private void addColumnMinMaxValueForColumn(String columnName) throws Exception {
// Skip column without dictionary or with min/max value already set
ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(columnName);
if ((!columnMetadata.hasDictionary()) || (columnMetadata.getMinValue() != null)) {
return;
}
PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, ColumnIndexType.DICTIONARY);
FieldSpec.DataType dataType = columnMetadata.getDataType();
switch(dataType) {
case INT:
IntDictionary intDictionary = new IntDictionary(dictionaryBuffer, columnMetadata);
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, intDictionary.getStringValue(0), intDictionary.getStringValue(intDictionary.length() - 1));
break;
case LONG:
LongDictionary longDictionary = new LongDictionary(dictionaryBuffer, columnMetadata);
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, longDictionary.getStringValue(0), longDictionary.getStringValue(longDictionary.length() - 1));
break;
case FLOAT:
FloatDictionary floatDictionary = new FloatDictionary(dictionaryBuffer, columnMetadata);
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, floatDictionary.getStringValue(0), floatDictionary.getStringValue(floatDictionary.length() - 1));
break;
case DOUBLE:
DoubleDictionary doubleDictionary = new DoubleDictionary(dictionaryBuffer, columnMetadata);
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, doubleDictionary.getStringValue(0), doubleDictionary.getStringValue(doubleDictionary.length() - 1));
break;
case STRING:
StringDictionary stringDictionary = new StringDictionary(dictionaryBuffer, columnMetadata);
SegmentColumnarIndexCreator.addColumnMinMaxValueInfo(_segmentProperties, columnName, stringDictionary.get(0), stringDictionary.get(stringDictionary.length() - 1));
break;
default:
throw new IllegalStateException("Unsupported data type: " + dataType + " for column: " + columnName);
}
_minMaxValueAdded = true;
}
use of com.linkedin.pinot.core.segment.index.readers.StringDictionary in project pinot by linkedin.
the class DictionariesTest method test1.
@Test
public void test1() throws Exception {
final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.heap);
final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(segmentDirectory, ReadMode.mmap);
for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().keySet()) {
final ImmutableDictionaryReader heapDictionary = heapSegment.getDictionaryFor(column);
final ImmutableDictionaryReader mmapDictionary = mmapSegment.getDictionaryFor(column);
switch(((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()).getColumnMetadataMap().get(column).getDataType()) {
case BOOLEAN:
case STRING:
Assert.assertTrue(heapDictionary instanceof StringDictionary);
Assert.assertTrue(mmapDictionary instanceof StringDictionary);
break;
case DOUBLE:
Assert.assertTrue(heapDictionary instanceof DoubleDictionary);
Assert.assertTrue(mmapDictionary instanceof DoubleDictionary);
break;
case FLOAT:
Assert.assertTrue(heapDictionary instanceof FloatDictionary);
Assert.assertTrue(mmapDictionary instanceof FloatDictionary);
break;
case LONG:
Assert.assertTrue(heapDictionary instanceof LongDictionary);
Assert.assertTrue(mmapDictionary instanceof LongDictionary);
break;
case INT:
Assert.assertTrue(heapDictionary instanceof IntDictionary);
Assert.assertTrue(mmapDictionary instanceof IntDictionary);
break;
}
Assert.assertEquals(mmapDictionary.length(), heapDictionary.length());
for (int i = 0; i < heapDictionary.length(); i++) {
Assert.assertEquals(mmapDictionary.get(i), heapDictionary.get(i));
}
}
}
use of com.linkedin.pinot.core.segment.index.readers.StringDictionary in project pinot by linkedin.
the class LoadersTest method testPadding.
@Test
public void testPadding() throws Exception {
// Old Format
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_OLD))), INDEX_DIR);
File segmentDirectory = new File(INDEX_DIR, "paddingOld");
SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
SegmentDirectory segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
ColumnMetadata columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
SegmentDirectory.Reader reader = segmentDir.createReader();
PinotDataBuffer dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
StringDictionary dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character %
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_PERCENT))), INDEX_DIR);
segmentDirectory = new File(INDEX_DIR, "paddingPercent");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character Null
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_NULL))), INDEX_DIR);
segmentDirectory = new File(INDEX_DIR, "paddingNull");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda\0\0\0\0");
Assert.assertEquals(dict.getStringValue(1), "lynda 2.0");
Assert.assertEquals(dict.get(0), "lynda");
Assert.assertEquals(dict.get(1), "lynda 2.0");
Assert.assertEquals(dict.indexOf("lynda\0"), 0);
Assert.assertEquals(dict.indexOf("lynda\0\0"), 0);
}
Aggregations