use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class BitmapPerformanceBenchmark method iterationSpeed.
public static void iterationSpeed(String indexSegmentDir, String column) throws Exception {
File indexSegment = new File(indexSegmentDir);
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexSegment);
Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
File bitMapIndexFile = new File(indexSegmentDir, column + ".bitmap.inv");
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
int cardinality = columnMetadata.getCardinality();
cardinalityMap.put(column, cardinality);
PinotDataBuffer bitMapDataBuffer = PinotDataBuffer.fromFile(bitMapIndexFile, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitMapDataBuffer, cardinality);
File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegment, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
ImmutableDictionaryReader dictionary = container.getDictionary();
dictionaryMap.put(column, dictionary);
// System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
bitMapIndexMap.put(column, bitmapInvertedIndex);
int dictId = dictionary.indexOf("na.us");
ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
Iterator<Integer> iterator = immutable.iterator();
int count = 0;
long start = System.currentTimeMillis();
while (iterator.hasNext()) {
iterator.next();
count = count + 1;
}
long end = System.currentTimeMillis();
System.out.println(" matched: " + count + " Time to iterate:" + (end - start));
bitMapDataBuffer.close();
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class BitmapPerformanceBenchmark method benchmarkIntersetionAndUnion.
public static void benchmarkIntersetionAndUnion(String indexSegmentDir) throws ConfigurationException, IOException, Exception {
File[] listFiles = new File(indexSegmentDir).listFiles();
File indexDir = new File(indexSegmentDir);
SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexDir);
Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
for (File file : listFiles) {
if (!file.getName().endsWith("bitmap.inv")) {
continue;
}
String column = file.getName().replaceAll(".bitmap.inv", "");
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
int cardinality = columnMetadata.getCardinality();
cardinalityMap.put(column, cardinality);
System.out.println(column + "\t\t\t" + cardinality + " \t" + columnMetadata.getDataType());
PinotDataBuffer bitmapDataBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitmapDataBuffer, cardinality);
File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexDir, segmentMetadata, ReadMode.mmap);
SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
ImmutableDictionaryReader dictionary = container.getDictionary();
if (columnMetadata.getDataType() == DataType.INT) {
System.out.println("BitmapPerformanceBenchmark.main()");
assert dictionary instanceof IntDictionary;
}
dictionaryMap.put(column, dictionary);
// System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
bitMapIndexMap.put(column, bitmapInvertedIndex);
bitmapDataBuffer.close();
}
List<String> dimensionNamesList = segmentMetadata.getSchema().getDimensionNames();
Collections.shuffle(dimensionNamesList);
int NUM_TEST = 100;
final int MAX_DIMENSIONS_PER_DIMENSION = 1;
int MAX_DIMENSIONS_IN_WHERE_CLAUSE = 3;
Random random = new Random();
for (int numDimensions = 1; numDimensions <= MAX_DIMENSIONS_IN_WHERE_CLAUSE; numDimensions++) {
for (int numValuesPerDimension = 1; numValuesPerDimension <= MAX_DIMENSIONS_PER_DIMENSION; numValuesPerDimension++) {
int runCount = 0;
while (runCount < NUM_TEST) {
Collections.shuffle(dimensionNamesList);
List<ImmutableRoaringBitmap> bitMaps = new ArrayList<ImmutableRoaringBitmap>();
List<String> columnNameValuePairs = new ArrayList<String>();
for (int i = 0; i < numDimensions; i++) {
String columnName = dimensionNamesList.get(i);
InvertedIndexReader bitmapInvertedIndex = bitMapIndexMap.get(columnName);
for (int j = 0; j < numValuesPerDimension; j++) {
int dictId = random.nextInt(cardinalityMap.get(columnName));
String dictValue = dictionaryMap.get(columnName).getStringValue(dictId);
columnNameValuePairs.add(columnName + ":" + dictValue);
ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
bitMaps.add(immutable);
}
}
System.out.println("START**********************************");
int[] cardinality = new int[bitMaps.size()];
int[] sizes = new int[bitMaps.size()];
for (int i = 0; i < bitMaps.size(); i++) {
ImmutableRoaringBitmap immutableRoaringBitmap = bitMaps.get(i);
cardinality[i] = immutableRoaringBitmap.getCardinality();
sizes[i] = immutableRoaringBitmap.getSizeInBytes();
}
System.out.println("\t#bitmaps:" + bitMaps.size());
System.out.println("\tinput values:" + columnNameValuePairs);
System.out.println("\tinput cardinality:" + Arrays.toString(cardinality));
System.out.println("\tinput sizes:" + Arrays.toString(sizes));
and(bitMaps);
or(bitMaps);
System.out.println("END**********************************");
runCount = runCount + 1;
}
}
}
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class BaseDefaultColumnHandler method computeDefaultColumnActionMap.
/**
* Compute the action needed for each column.
* This method compares the column metadata across schema and segment.
*
* @return Action Map for each column.
*/
private Map<String, DefaultColumnAction> computeDefaultColumnActionMap() {
Map<String, DefaultColumnAction> defaultColumnActionMap = new HashMap<>();
// Compute ADD and UPDATE actions.
Collection<String> columnsInSchema = _schema.getColumnNames();
for (String column : columnsInSchema) {
FieldSpec fieldSpecInSchema = _schema.getFieldSpecFor(column);
Preconditions.checkNotNull(fieldSpecInSchema);
FieldSpec.FieldType fieldTypeInSchema = fieldSpecInSchema.getFieldType();
ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column);
if (columnMetadata != null) {
// Only check for auto-generated column.
if (!columnMetadata.isAutoGenerated()) {
continue;
}
// Check the field type matches.
FieldSpec.FieldType fieldTypeInMetadata = columnMetadata.getFieldType();
if (fieldTypeInMetadata != fieldTypeInSchema) {
String failureMessage = "Field type: " + fieldTypeInMetadata + " for auto-generated column: " + column + " does not match field type: " + fieldTypeInSchema + " in schema, throw exception to drop and re-download the segment.";
throw new RuntimeException(failureMessage);
}
// Check the data type and default value matches.
FieldSpec.DataType dataTypeInMetadata = columnMetadata.getDataType();
FieldSpec.DataType dataTypeInSchema = fieldSpecInSchema.getDataType();
boolean isSingleValueInMetadata = columnMetadata.isSingleValue();
boolean isSingleValueInSchema = fieldSpecInSchema.isSingleValueField();
String defaultValueInMetadata = columnMetadata.getDefaultNullValueString();
String defaultValueInSchema = fieldSpecInSchema.getDefaultNullValue().toString();
if (dataTypeInMetadata != dataTypeInSchema || isSingleValueInMetadata != isSingleValueInSchema || !defaultValueInSchema.equals(defaultValueInMetadata)) {
if (fieldTypeInMetadata == FieldSpec.FieldType.DIMENSION) {
defaultColumnActionMap.put(column, DefaultColumnAction.UPDATE_DIMENSION);
} else {
Preconditions.checkState(fieldTypeInMetadata == FieldSpec.FieldType.METRIC);
defaultColumnActionMap.put(column, DefaultColumnAction.UPDATE_METRIC);
}
}
} else {
switch(fieldTypeInSchema) {
case DIMENSION:
defaultColumnActionMap.put(column, DefaultColumnAction.ADD_DIMENSION);
break;
case METRIC:
defaultColumnActionMap.put(column, DefaultColumnAction.ADD_METRIC);
break;
default:
LOGGER.warn("Skip adding default column for column: {} with field type: {}", column, fieldTypeInSchema);
break;
}
}
}
// Compute REMOVE actions.
Set<String> columnsInMetadata = _segmentMetadata.getAllColumns();
for (String column : columnsInMetadata) {
if (!columnsInSchema.contains(column)) {
ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column);
// Only remove auto-generated columns.
if (columnMetadata.isAutoGenerated()) {
FieldSpec.FieldType fieldTypeInMetadata = columnMetadata.getFieldType();
if (fieldTypeInMetadata == FieldSpec.FieldType.DIMENSION) {
defaultColumnActionMap.put(column, DefaultColumnAction.REMOVE_DIMENSION);
} else {
Preconditions.checkState(fieldTypeInMetadata == FieldSpec.FieldType.METRIC);
defaultColumnActionMap.put(column, DefaultColumnAction.REMOVE_METRIC);
}
}
}
}
return defaultColumnActionMap;
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class SegmentFormatConverterV1ToV2 method convert.
@Override
public void convert(File indexSegmentDir) throws Exception {
SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
Set<String> columns = segmentMetadataImpl.getAllColumns();
SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
if (columnMetadata.isSorted()) {
// no need to change sorted forward index
continue;
}
PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int value = v1Reader.getInt(row);
v2Writer.setInt(row, value);
}
v1Reader.close();
v2Writer.close();
File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
// FIXME
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
if (!columnMetadata.isSingleValue()) {
// since we use dictionary to encode values, we wont have any negative values in forward
// index
boolean signed = false;
SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
int length = v1Reader.getIntArray(row, values);
int[] copy = new int[length];
System.arraycopy(values, 0, copy, 0, length);
v2Writer.setIntArray(row, copy);
}
v1Reader.close();
v2Writer.close();
segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
newIndexBuffer.readFrom(convertedFwdIndexFile);
convertedFwdIndexFile.delete();
}
}
File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
bis = new BufferedInputStream(new FileInputStream(metadataFile));
bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
IOUtils.copy(bis, bos);
bis.close();
bos.close();
final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
// update the segment version
properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
metadataFile.delete();
properties.save(metadataFile);
}
use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.
the class ColumnDataSourceImpl method getNextBlock.
@Override
public Block getNextBlock(BlockId blockId) {
Block b = null;
ColumnMetadata columnMetadata = indexContainer.getColumnMetadata();
if (columnMetadata.isSingleValue()) {
// TODO: Support sorted index without dictionary.
if (columnMetadata.hasDictionary() && columnMetadata.isSorted()) {
b = new SortedSingleValueBlock(blockId, (SortedForwardIndexReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
} else {
b = new UnSortedSingleValueBlock(blockId, (SingleColumnSingleValueReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
}
} else {
b = new MultiValueBlock(blockId, (SingleColumnMultiValueReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
}
return b;
}
Aggregations