Search in sources :

Example 6 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class BitmapPerformanceBenchmark method iterationSpeed.

public static void iterationSpeed(String indexSegmentDir, String column) throws Exception {
    File indexSegment = new File(indexSegmentDir);
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexSegment);
    Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
    Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
    Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
    File bitMapIndexFile = new File(indexSegmentDir, column + ".bitmap.inv");
    ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
    int cardinality = columnMetadata.getCardinality();
    cardinalityMap.put(column, cardinality);
    PinotDataBuffer bitMapDataBuffer = PinotDataBuffer.fromFile(bitMapIndexFile, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
    BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitMapDataBuffer, cardinality);
    File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
    SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegment, segmentMetadata, ReadMode.mmap);
    SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
    ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
    ImmutableDictionaryReader dictionary = container.getDictionary();
    dictionaryMap.put(column, dictionary);
    // System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
    bitMapIndexMap.put(column, bitmapInvertedIndex);
    int dictId = dictionary.indexOf("na.us");
    ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
    Iterator<Integer> iterator = immutable.iterator();
    int count = 0;
    long start = System.currentTimeMillis();
    while (iterator.hasNext()) {
        iterator.next();
        count = count + 1;
    }
    long end = System.currentTimeMillis();
    System.out.println(" matched: " + count + " Time to iterate:" + (end - start));
    bitMapDataBuffer.close();
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) HashMap(java.util.HashMap) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) BitmapInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) ColumnIndexContainer(com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) File(java.io.File)

Example 7 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class BitmapPerformanceBenchmark method benchmarkIntersetionAndUnion.

public static void benchmarkIntersetionAndUnion(String indexSegmentDir) throws ConfigurationException, IOException, Exception {
    File[] listFiles = new File(indexSegmentDir).listFiles();
    File indexDir = new File(indexSegmentDir);
    SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexDir);
    Map<String, BitmapInvertedIndexReader> bitMapIndexMap = new HashMap<String, BitmapInvertedIndexReader>();
    Map<String, Integer> cardinalityMap = new HashMap<String, Integer>();
    Map<String, ImmutableDictionaryReader> dictionaryMap = new HashMap<String, ImmutableDictionaryReader>();
    for (File file : listFiles) {
        if (!file.getName().endsWith("bitmap.inv")) {
            continue;
        }
        String column = file.getName().replaceAll(".bitmap.inv", "");
        ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
        int cardinality = columnMetadata.getCardinality();
        cardinalityMap.put(column, cardinality);
        System.out.println(column + "\t\t\t" + cardinality + "  \t" + columnMetadata.getDataType());
        PinotDataBuffer bitmapDataBuffer = PinotDataBuffer.fromFile(file, ReadMode.mmap, FileChannel.MapMode.READ_ONLY, "testing");
        BitmapInvertedIndexReader bitmapInvertedIndex = new BitmapInvertedIndexReader(bitmapDataBuffer, cardinality);
        File dictionaryFile = new File(indexSegmentDir + "/" + column + ".dict");
        SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexDir, segmentMetadata, ReadMode.mmap);
        SegmentDirectory.Reader segmentReader = segmentDirectory.createReader();
        ColumnIndexContainer container = ColumnIndexContainer.init(segmentReader, columnMetadata, null);
        ImmutableDictionaryReader dictionary = container.getDictionary();
        if (columnMetadata.getDataType() == DataType.INT) {
            System.out.println("BitmapPerformanceBenchmark.main()");
            assert dictionary instanceof IntDictionary;
        }
        dictionaryMap.put(column, dictionary);
        // System.out.println(column + ":\t" + MemoryUtil.deepMemoryUsageOf(bitmapInvertedIndex));
        bitMapIndexMap.put(column, bitmapInvertedIndex);
        bitmapDataBuffer.close();
    }
    List<String> dimensionNamesList = segmentMetadata.getSchema().getDimensionNames();
    Collections.shuffle(dimensionNamesList);
    int NUM_TEST = 100;
    final int MAX_DIMENSIONS_PER_DIMENSION = 1;
    int MAX_DIMENSIONS_IN_WHERE_CLAUSE = 3;
    Random random = new Random();
    for (int numDimensions = 1; numDimensions <= MAX_DIMENSIONS_IN_WHERE_CLAUSE; numDimensions++) {
        for (int numValuesPerDimension = 1; numValuesPerDimension <= MAX_DIMENSIONS_PER_DIMENSION; numValuesPerDimension++) {
            int runCount = 0;
            while (runCount < NUM_TEST) {
                Collections.shuffle(dimensionNamesList);
                List<ImmutableRoaringBitmap> bitMaps = new ArrayList<ImmutableRoaringBitmap>();
                List<String> columnNameValuePairs = new ArrayList<String>();
                for (int i = 0; i < numDimensions; i++) {
                    String columnName = dimensionNamesList.get(i);
                    InvertedIndexReader bitmapInvertedIndex = bitMapIndexMap.get(columnName);
                    for (int j = 0; j < numValuesPerDimension; j++) {
                        int dictId = random.nextInt(cardinalityMap.get(columnName));
                        String dictValue = dictionaryMap.get(columnName).getStringValue(dictId);
                        columnNameValuePairs.add(columnName + ":" + dictValue);
                        ImmutableRoaringBitmap immutable = bitmapInvertedIndex.getImmutable(dictId);
                        bitMaps.add(immutable);
                    }
                }
                System.out.println("START**********************************");
                int[] cardinality = new int[bitMaps.size()];
                int[] sizes = new int[bitMaps.size()];
                for (int i = 0; i < bitMaps.size(); i++) {
                    ImmutableRoaringBitmap immutableRoaringBitmap = bitMaps.get(i);
                    cardinality[i] = immutableRoaringBitmap.getCardinality();
                    sizes[i] = immutableRoaringBitmap.getSizeInBytes();
                }
                System.out.println("\t#bitmaps:" + bitMaps.size());
                System.out.println("\tinput values:" + columnNameValuePairs);
                System.out.println("\tinput cardinality:" + Arrays.toString(cardinality));
                System.out.println("\tinput sizes:" + Arrays.toString(sizes));
                and(bitMaps);
                or(bitMaps);
                System.out.println("END**********************************");
                runCount = runCount + 1;
            }
        }
    }
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) HashMap(java.util.HashMap) ImmutableDictionaryReader(com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader) ArrayList(java.util.ArrayList) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) IntDictionary(com.linkedin.pinot.core.segment.index.readers.IntDictionary) Random(java.util.Random) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) BitmapInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader) ColumnIndexContainer(com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) BitmapInvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader) InvertedIndexReader(com.linkedin.pinot.core.segment.index.readers.InvertedIndexReader) File(java.io.File)

Example 8 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class BaseDefaultColumnHandler method computeDefaultColumnActionMap.

/**
   * Compute the action needed for each column.
   * This method compares the column metadata across schema and segment.
   *
   * @return Action Map for each column.
   */
private Map<String, DefaultColumnAction> computeDefaultColumnActionMap() {
    Map<String, DefaultColumnAction> defaultColumnActionMap = new HashMap<>();
    // Compute ADD and UPDATE actions.
    Collection<String> columnsInSchema = _schema.getColumnNames();
    for (String column : columnsInSchema) {
        FieldSpec fieldSpecInSchema = _schema.getFieldSpecFor(column);
        Preconditions.checkNotNull(fieldSpecInSchema);
        FieldSpec.FieldType fieldTypeInSchema = fieldSpecInSchema.getFieldType();
        ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column);
        if (columnMetadata != null) {
            // Only check for auto-generated column.
            if (!columnMetadata.isAutoGenerated()) {
                continue;
            }
            // Check the field type matches.
            FieldSpec.FieldType fieldTypeInMetadata = columnMetadata.getFieldType();
            if (fieldTypeInMetadata != fieldTypeInSchema) {
                String failureMessage = "Field type: " + fieldTypeInMetadata + " for auto-generated column: " + column + " does not match field type: " + fieldTypeInSchema + " in schema, throw exception to drop and re-download the segment.";
                throw new RuntimeException(failureMessage);
            }
            // Check the data type and default value matches.
            FieldSpec.DataType dataTypeInMetadata = columnMetadata.getDataType();
            FieldSpec.DataType dataTypeInSchema = fieldSpecInSchema.getDataType();
            boolean isSingleValueInMetadata = columnMetadata.isSingleValue();
            boolean isSingleValueInSchema = fieldSpecInSchema.isSingleValueField();
            String defaultValueInMetadata = columnMetadata.getDefaultNullValueString();
            String defaultValueInSchema = fieldSpecInSchema.getDefaultNullValue().toString();
            if (dataTypeInMetadata != dataTypeInSchema || isSingleValueInMetadata != isSingleValueInSchema || !defaultValueInSchema.equals(defaultValueInMetadata)) {
                if (fieldTypeInMetadata == FieldSpec.FieldType.DIMENSION) {
                    defaultColumnActionMap.put(column, DefaultColumnAction.UPDATE_DIMENSION);
                } else {
                    Preconditions.checkState(fieldTypeInMetadata == FieldSpec.FieldType.METRIC);
                    defaultColumnActionMap.put(column, DefaultColumnAction.UPDATE_METRIC);
                }
            }
        } else {
            switch(fieldTypeInSchema) {
                case DIMENSION:
                    defaultColumnActionMap.put(column, DefaultColumnAction.ADD_DIMENSION);
                    break;
                case METRIC:
                    defaultColumnActionMap.put(column, DefaultColumnAction.ADD_METRIC);
                    break;
                default:
                    LOGGER.warn("Skip adding default column for column: {} with field type: {}", column, fieldTypeInSchema);
                    break;
            }
        }
    }
    // Compute REMOVE actions.
    Set<String> columnsInMetadata = _segmentMetadata.getAllColumns();
    for (String column : columnsInMetadata) {
        if (!columnsInSchema.contains(column)) {
            ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column);
            // Only remove auto-generated columns.
            if (columnMetadata.isAutoGenerated()) {
                FieldSpec.FieldType fieldTypeInMetadata = columnMetadata.getFieldType();
                if (fieldTypeInMetadata == FieldSpec.FieldType.DIMENSION) {
                    defaultColumnActionMap.put(column, DefaultColumnAction.REMOVE_DIMENSION);
                } else {
                    Preconditions.checkState(fieldTypeInMetadata == FieldSpec.FieldType.METRIC);
                    defaultColumnActionMap.put(column, DefaultColumnAction.REMOVE_METRIC);
                }
            }
        }
    }
    return defaultColumnActionMap;
}
Also used : ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) HashMap(java.util.HashMap) FieldSpec(com.linkedin.pinot.common.data.FieldSpec)

Example 9 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class SegmentFormatConverterV1ToV2 method convert.

@Override
public void convert(File indexSegmentDir) throws Exception {
    SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(indexSegmentDir);
    SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(indexSegmentDir, segmentMetadataImpl, ReadMode.mmap);
    Set<String> columns = segmentMetadataImpl.getAllColumns();
    SegmentDirectory.Writer segmentWriter = segmentDirectory.createWriter();
    for (String column : columns) {
        ColumnMetadata columnMetadata = segmentMetadataImpl.getColumnMetadataFor(column);
        if (columnMetadata.isSorted()) {
            // no need to change sorted forward index
            continue;
        }
        PinotDataBuffer fwdIndexBuffer = segmentWriter.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
        if (columnMetadata.isSingleValue() && !columnMetadata.isSorted()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnSingleValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement(), false);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnSingleValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitSingleValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getBitsPerElement());
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int value = v1Reader.getInt(row);
                v2Writer.setInt(row, value);
            }
            v1Reader.close();
            v2Writer.close();
            File fwdIndexFileCopy = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_SV_FWD_IDX_FILE_EXTENTION + ".orig");
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            // FIXME
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
        if (!columnMetadata.isSingleValue()) {
            // since we use dictionary to encode values, we wont have any negative values in forward
            // index
            boolean signed = false;
            SingleColumnMultiValueReader v1Reader = new com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), signed);
            File convertedFwdIndexFile = new File(indexSegmentDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION + ".tmp");
            SingleColumnMultiValueWriter v2Writer = new com.linkedin.pinot.core.io.writer.impl.v2.FixedBitMultiValueWriter(convertedFwdIndexFile, segmentMetadataImpl.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement());
            int[] values = new int[columnMetadata.getMaxNumberOfMultiValues()];
            for (int row = 0; row < segmentMetadataImpl.getTotalDocs(); row++) {
                int length = v1Reader.getIntArray(row, values);
                int[] copy = new int[length];
                System.arraycopy(values, 0, copy, 0, length);
                v2Writer.setIntArray(row, copy);
            }
            v1Reader.close();
            v2Writer.close();
            segmentWriter.removeIndex(column, ColumnIndexType.FORWARD_INDEX);
            PinotDataBuffer newIndexBuffer = segmentWriter.newIndexFor(column, ColumnIndexType.FORWARD_INDEX, (int) convertedFwdIndexFile.length());
            newIndexBuffer.readFrom(convertedFwdIndexFile);
            convertedFwdIndexFile.delete();
        }
    }
    File metadataFile = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME);
    File metadataFileCopy = new File(indexSegmentDir, V1Constants.MetadataKeys.METADATA_FILE_NAME + ".orig");
    bis = new BufferedInputStream(new FileInputStream(metadataFile));
    bos = new BufferedOutputStream(new FileOutputStream(metadataFileCopy));
    IOUtils.copy(bis, bos);
    bis.close();
    bos.close();
    final PropertiesConfiguration properties = new PropertiesConfiguration(metadataFileCopy);
    // update the segment version
    properties.setProperty(V1Constants.MetadataKeys.Segment.SEGMENT_VERSION, SegmentVersion.v2.toString());
    metadataFile.delete();
    properties.save(metadataFile);
}
Also used : SingleColumnMultiValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnMultiValueWriter) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SegmentDirectory(com.linkedin.pinot.core.segment.store.SegmentDirectory) PropertiesConfiguration(org.apache.commons.configuration.PropertiesConfiguration) SingleColumnSingleValueWriter(com.linkedin.pinot.core.io.writer.SingleColumnSingleValueWriter) BufferedInputStream(java.io.BufferedInputStream) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader) BufferedOutputStream(java.io.BufferedOutputStream) SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) FileInputStream(java.io.FileInputStream) PinotDataBuffer(com.linkedin.pinot.core.segment.memory.PinotDataBuffer) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 10 with ColumnMetadata

use of com.linkedin.pinot.core.segment.index.ColumnMetadata in project pinot by linkedin.

the class ColumnDataSourceImpl method getNextBlock.

@Override
public Block getNextBlock(BlockId blockId) {
    Block b = null;
    ColumnMetadata columnMetadata = indexContainer.getColumnMetadata();
    if (columnMetadata.isSingleValue()) {
        // TODO: Support sorted index without dictionary.
        if (columnMetadata.hasDictionary() && columnMetadata.isSorted()) {
            b = new SortedSingleValueBlock(blockId, (SortedForwardIndexReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
        } else {
            b = new UnSortedSingleValueBlock(blockId, (SingleColumnSingleValueReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
        }
    } else {
        b = new MultiValueBlock(blockId, (SingleColumnMultiValueReader) indexContainer.getForwardIndex(), indexContainer.getDictionary(), columnMetadata);
    }
    return b;
}
Also used : SingleColumnSingleValueReader(com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader) UnSortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.UnSortedSingleValueBlock) ColumnMetadata(com.linkedin.pinot.core.segment.index.ColumnMetadata) SortedForwardIndexReader(com.linkedin.pinot.core.io.reader.impl.SortedForwardIndexReader) SortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.SortedSingleValueBlock) UnSortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.UnSortedSingleValueBlock) Block(com.linkedin.pinot.core.common.Block) MultiValueBlock(com.linkedin.pinot.core.operator.blocks.MultiValueBlock) SortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.SortedSingleValueBlock) UnSortedSingleValueBlock(com.linkedin.pinot.core.operator.blocks.UnSortedSingleValueBlock) MultiValueBlock(com.linkedin.pinot.core.operator.blocks.MultiValueBlock) SingleColumnMultiValueReader(com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)

Aggregations

ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)16 SegmentMetadataImpl (com.linkedin.pinot.core.segment.index.SegmentMetadataImpl)10 PinotDataBuffer (com.linkedin.pinot.core.segment.memory.PinotDataBuffer)5 SegmentDirectory (com.linkedin.pinot.core.segment.store.SegmentDirectory)5 File (java.io.File)5 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)4 Test (org.testng.annotations.Test)4 SingleColumnMultiValueReader (com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader)3 SingleColumnSingleValueReader (com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader)3 ImmutableDictionaryReader (com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader)3 HashMap (java.util.HashMap)3 FilterQueryTree (com.linkedin.pinot.common.utils.request.FilterQueryTree)2 IndexSegmentImpl (com.linkedin.pinot.core.segment.index.IndexSegmentImpl)2 ColumnIndexContainer (com.linkedin.pinot.core.segment.index.column.ColumnIndexContainer)2 BitmapInvertedIndexReader (com.linkedin.pinot.core.segment.index.readers.BitmapInvertedIndexReader)2 IntDictionary (com.linkedin.pinot.core.segment.index.readers.IntDictionary)2 StringDictionary (com.linkedin.pinot.core.segment.index.readers.StringDictionary)2 PropertiesConfiguration (org.apache.commons.configuration.PropertiesConfiguration)2 ImmutableRoaringBitmap (org.roaringbitmap.buffer.ImmutableRoaringBitmap)2 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)1