use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class ForwardIndexReaderBenchmark method benchmarkForwardIndex.
private static void benchmarkForwardIndex(String indexDir, List<String> includeColumns) throws Exception {
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(new File(indexDir));
String segmentVersion = segmentMetadata.getVersion();
Set<String> columns = segmentMetadata.getAllColumns();
for (String column : columns) {
if (includeColumns != null && !includeColumns.isEmpty()) {
if (!includeColumns.contains(column)) {
continue;
}
}
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
if (columnMetadata.isSingleValue()) {
continue;
}
if (!columnMetadata.isSingleValue()) {
String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
File fwdIndexFile = new File(indexDir, fwdIndexFileName);
multiValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getMaxNumberOfMultiValues(), columnMetadata.getBitsPerElement());
} else if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
String fwdIndexFileName = segmentMetadata.getForwardIndexFileName(column, segmentVersion);
File fwdIndexFile = new File(indexDir, fwdIndexFileName);
singleValuedReadBenchMark(segmentVersion, fwdIndexFile, segmentMetadata.getTotalDocs(), columnMetadata.getBitsPerElement());
}
}
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class PinotSegmentRecordReader method getSchema.
@Override
public Schema getSchema() {
Schema schema = new Schema();
schema.setSchemaName(segmentMetadata.getName());
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
String columnName = columnMetadata.getColumnName();
DataType dataType = columnMetadata.getDataType();
FieldType fieldType = columnMetadata.getFieldType();
FieldSpec fieldSpec = null;
switch(fieldType) {
case DIMENSION:
boolean isSingleValue = columnMetadata.isSingleValue();
fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
break;
case METRIC:
fieldSpec = new MetricFieldSpec(columnName, dataType);
break;
case TIME:
TimeUnit timeType = columnMetadata.getTimeUnit();
TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
break;
default:
break;
}
schema.addField(fieldSpec);
}
return schema;
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class IntArraysTest method test1.
@Test
public void test1() throws Exception {
final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR.listFiles()[0], ReadMode.heap);
final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR.listFiles()[0], ReadMode.mmap);
final Map<String, ColumnMetadata> metadataMap = ((SegmentMetadataImpl) heapSegment.getSegmentMetadata()).getColumnMetadataMap();
for (final String column : metadataMap.keySet()) {
final DataFileReader heapArray = heapSegment.getForwardIndexReaderFor(column);
final DataFileReader mmapArray = mmapSegment.getForwardIndexReaderFor(column);
if (metadataMap.get(column).isSingleValue()) {
final SingleColumnSingleValueReader svHeapReader = (SingleColumnSingleValueReader) heapArray;
final SingleColumnSingleValueReader mvMmapReader = (SingleColumnSingleValueReader) mmapArray;
for (int i = 0; i < metadataMap.get(column).getTotalDocs(); i++) {
Assert.assertEquals(mvMmapReader.getInt(i), svHeapReader.getInt(i));
}
} else {
final SingleColumnMultiValueReader svHeapReader = (SingleColumnMultiValueReader) heapArray;
final SingleColumnMultiValueReader mvMmapReader = (SingleColumnMultiValueReader) mmapArray;
for (int i = 0; i < metadataMap.get(column).getTotalDocs(); i++) {
final int[] i_1 = new int[1000];
final int[] j_i = new int[1000];
Assert.assertEquals(mvMmapReader.getIntArray(i, j_i), svHeapReader.getIntArray(i, i_1));
}
}
}
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class LoadersTest method testPadding.
@Test
public void testPadding() throws Exception {
// Old Format
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_OLD))), INDEX_DIR);
File segmentDirectory = new File(INDEX_DIR, "paddingOld");
SegmentMetadataImpl originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
SegmentDirectory segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
ColumnMetadata columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
SegmentDirectory.Reader reader = segmentDir.createReader();
PinotDataBuffer dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
StringDictionary dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character %
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_PERCENT))), INDEX_DIR);
segmentDirectory = new File(INDEX_DIR, "paddingPercent");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.LEGACY_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda 2.0");
Assert.assertEquals(dict.getStringValue(1), "lynda%%%%");
Assert.assertEquals(dict.get(0), "lynda 2.0");
Assert.assertEquals(dict.get(1), "lynda");
Assert.assertEquals(dict.indexOf("lynda%"), 1);
Assert.assertEquals(dict.indexOf("lynda%%"), 1);
// New Format Padding character Null
TarGzCompressionUtils.unTar(new File(TestUtils.getFileFromResourceUrl(Loaders.class.getClassLoader().getResource(PADDING_NULL))), INDEX_DIR);
segmentDirectory = new File(INDEX_DIR, "paddingNull");
originalMetadata = new SegmentMetadataImpl(segmentDirectory);
Assert.assertEquals(originalMetadata.getColumnMetadataFor("name").getPaddingCharacter(), V1Constants.Str.DEFAULT_STRING_PAD_CHAR);
segmentDir = SegmentDirectory.createFromLocalFS(segmentDirectory, originalMetadata, ReadMode.heap);
columnMetadataFor = originalMetadata.getColumnMetadataFor("name");
reader = segmentDir.createReader();
dictionaryBuffer = reader.getIndexFor("name", ColumnIndexType.DICTIONARY);
dict = new StringDictionary(dictionaryBuffer, columnMetadataFor);
Assert.assertEquals(dict.getStringValue(0), "lynda\0\0\0\0");
Assert.assertEquals(dict.getStringValue(1), "lynda 2.0");
Assert.assertEquals(dict.get(0), "lynda");
Assert.assertEquals(dict.get(1), "lynda 2.0");
Assert.assertEquals(dict.indexOf("lynda\0"), 0);
Assert.assertEquals(dict.indexOf("lynda\0\0"), 0);
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class SegmentPreProcessorTest method checkUpdateDefaultColumns.
private void checkUpdateDefaultColumns(File segmentDirectoryFile) throws Exception {
// Update default value.
SegmentPreProcessor processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, _newColumnsSchema1);
processor.process();
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(segmentDirectoryFile);
// Check column metadata.
// Check all field for one column, and do necessary checks for other columns.
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getCardinality(), 1);
Assert.assertEquals(columnMetadata.getTotalDocs(), 100000);
Assert.assertEquals(columnMetadata.getTotalRawDocs(), 100000);
Assert.assertEquals(columnMetadata.getTotalAggDocs(), 0);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.INT);
Assert.assertEquals(columnMetadata.getBitsPerElement(), 1);
Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 0);
Assert.assertEquals(columnMetadata.getFieldType(), FieldSpec.FieldType.METRIC);
Assert.assertTrue(columnMetadata.isSorted());
Assert.assertFalse(columnMetadata.hasNulls());
Assert.assertTrue(columnMetadata.hasDictionary());
Assert.assertTrue(columnMetadata.hasInvertedIndex());
Assert.assertTrue(columnMetadata.isSingleValue());
Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 0);
Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
Assert.assertTrue(columnMetadata.isAutoGenerated());
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "1");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_LONG_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.LONG);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_FLOAT_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.FLOAT);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0.0");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_DOUBLE_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.DOUBLE);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "0.0");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.STRING);
Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 5);
Assert.assertEquals(columnMetadata.getFieldType(), FieldSpec.FieldType.DIMENSION);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "false");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_SV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.INT);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), String.valueOf(Integer.MIN_VALUE));
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDataType(), FieldSpec.DataType.STRING);
Assert.assertEquals(columnMetadata.getStringColumnMaxLength(), 4);
Assert.assertFalse(columnMetadata.isSorted());
Assert.assertFalse(columnMetadata.isSingleValue());
Assert.assertEquals(columnMetadata.getMaxNumberOfMultiValues(), 1);
Assert.assertEquals(columnMetadata.getTotalNumberOfEntries(), 100000);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "null");
// Check dictionary and forward index exist.
try (SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentDirectoryFile, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader reader = segmentDirectory.createReader()) {
Assert.assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_INT_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_LONG_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_FLOAT_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_FLOAT_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_DOUBLE_METRIC_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_DOUBLE_METRIC_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_BOOLEAN_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_INT_SV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
Assert.assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, ColumnIndexType.DICTIONARY));
Assert.assertTrue(reader.hasIndexFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME, ColumnIndexType.FORWARD_INDEX));
}
// Use the second schema and update default value again.
// For the second schema, we changed the default value for column 'newIntMetric' to 2, and added default value
// 'abcd' (keep the same length as 'null') to column 'newStringMVDimension'.
processor = new SegmentPreProcessor(segmentDirectoryFile, _indexLoadingConfigMetadata, _newColumnsSchema2);
processor.process();
segmentMetadata = new SegmentMetadataImpl(segmentDirectoryFile);
// Check column metadata.
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_INT_METRIC_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "2");
columnMetadata = segmentMetadata.getColumnMetadataFor(NEW_STRING_MV_DIMENSION_COLUMN_NAME);
Assert.assertEquals(columnMetadata.getDefaultNullValueString(), "abcd");
}
Aggregations