Search in sources :

Example 11 with IndexRow

use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.

the class BlockletIndex method loadToUnsafe.

private IndexRowImpl loadToUnsafe(CarbonRowSchema[] schema, CarbonRowSchema[] taskSummarySchema, DataFileFooter fileFooter, SegmentProperties segmentProperties, List<CarbonColumn> minMaxCacheColumns, String filePath, IndexRowImpl summaryRow, BlockMetaInfo blockMetaInfo, int relativeBlockletId) {
    List<BlockletInfo> blockletList = fileFooter.getBlockletList();
    // Add one row to maintain task level min max for segment pruning
    if (!blockletList.isEmpty() && summaryRow == null) {
        summaryRow = new IndexRowImpl(taskSummarySchema);
    }
    for (int index = 0; index < blockletList.size(); index++) {
        IndexRow row = new IndexRowImpl(schema);
        int ordinal = 0;
        int taskMinMaxOrdinal = 1;
        BlockletInfo blockletInfo = blockletList.get(index);
        blockletInfo.setSorted(fileFooter.isSorted());
        BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
        // get min max values for columns to be cached
        byte[][] minValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minMaxIndex.getMinValues());
        byte[][] maxValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minMaxIndex.getMaxValues());
        boolean[] minMaxFlagValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxFlagValuesForColumnsToBeCached(segmentProperties, minMaxCacheColumns, fileFooter.getBlockletIndex().getMinMaxIndex().getIsMinMaxSet());
        row.setRow(addMinMax(schema[ordinal], minValuesForColumnsToBeCached), ordinal);
        // compute and set task level min values
        addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, minValuesForColumnsToBeCached, TASK_MIN_VALUES_INDEX, true, fileFooter.getColumnInTable());
        ordinal++;
        taskMinMaxOrdinal++;
        row.setRow(addMinMax(schema[ordinal], maxValuesForColumnsToBeCached), ordinal);
        // compute and set task level max values
        addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, maxValuesForColumnsToBeCached, TASK_MAX_VALUES_INDEX, false, fileFooter.getColumnInTable());
        ordinal++;
        row.setInt(blockletInfo.getNumberOfRows(), ordinal++);
        // add file name
        byte[] filePathBytes = CarbonTablePath.getCarbonDataFileName(filePath).getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
        row.setByteArray(filePathBytes, ordinal++);
        // add version number
        row.setShort(fileFooter.getVersionId().number(), ordinal++);
        // add schema updated time
        row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
        byte[] serializedData;
        try {
            // Add block footer offset, it is used if we need to read footer of block
            row.setLong(fileFooter.getBlockInfo().getBlockOffset(), ordinal++);
            setLocations(blockMetaInfo.getLocationInfo(), row, ordinal++);
            // Store block size
            row.setLong(blockMetaInfo.getSize(), ordinal++);
            // add min max flag for all the dimension columns
            addMinMaxFlagValues(row, schema[ordinal], minMaxFlagValuesForColumnsToBeCached, ordinal);
            ordinal++;
            // add blocklet info
            ByteArrayOutputStream stream = new ByteArrayOutputStream();
            DataOutput dataOutput = new DataOutputStream(stream);
            blockletInfo.write(dataOutput);
            serializedData = stream.toByteArray();
            row.setByteArray(serializedData, ordinal++);
            // add pages
            row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);
            // for relative blocklet id i.e blocklet id that belongs to a particular carbondata file
            row.setShort((short) relativeBlockletId++, ordinal);
            memoryDMStore.addIndexRow(schema, row);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    return summaryRow;
}
Also used : DataOutput(java.io.DataOutput) IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) DataOutputStream(java.io.DataOutputStream) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) BlockletMinMaxIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex)

Example 12 with IndexRow

use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.

the class BlockIndex method prune.

private List<Blocklet> prune(FilterResolverIntf filterExp, FilterExecutor filterExecutor, SegmentProperties segmentProperties) {
    if (memoryDMStore.getRowCount() == 0) {
        return new ArrayList<>();
    }
    List<Blocklet> blocklets = new ArrayList<>();
    CarbonRowSchema[] schema = getFileFooterEntrySchema();
    String filePath = getFilePath();
    int numEntries = memoryDMStore.getRowCount();
    int totalBlocklets = 0;
    if (ExplainCollector.enabled()) {
        totalBlocklets = getTotalBlocklets();
    }
    int hitBlocklets = 0;
    if (filterExp == null) {
        for (int i = 0; i < numEntries; i++) {
            IndexRow indexRow = memoryDMStore.getIndexRow(schema, i);
            blocklets.add(createBlocklet(indexRow, getFileNameWithFilePath(indexRow, filePath), getBlockletId(indexRow), false));
        }
        hitBlocklets = totalBlocklets;
    } else {
        // Remove B-tree jump logic as start and end key prepared is not
        // correct for old store scenarios
        int entryIndex = 0;
        // flag to be used for deciding whether use min/max in executor pruning for BlockletIndex
        boolean useMinMaxForPruning = useMinMaxForExecutorPruning(filterExp);
        if (!validateSegmentProperties(segmentProperties)) {
            filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, getSegmentProperties(), null, getMinMaxCacheColumns(), false);
        }
        // min and max for executor pruning
        while (entryIndex < numEntries) {
            IndexRow row = memoryDMStore.getIndexRow(schema, entryIndex);
            boolean[] minMaxFlag = getMinMaxFlag(row, BLOCK_MIN_MAX_FLAG);
            String fileName = getFileNameWithFilePath(row, filePath);
            short blockletId = getBlockletId(row);
            boolean isValid = addBlockBasedOnMinMaxValue(filterExecutor, getMinMaxValue(row, MAX_VALUES_INDEX), getMinMaxValue(row, MIN_VALUES_INDEX), minMaxFlag, fileName, blockletId);
            if (isValid) {
                blocklets.add(createBlocklet(row, fileName, blockletId, useMinMaxForPruning));
                if (ExplainCollector.enabled()) {
                    hitBlocklets += getBlockletNumOfEntry(entryIndex);
                }
            }
            entryIndex++;
        }
    }
    if (ExplainCollector.enabled()) {
        ExplainCollector.setShowPruningInfo(true);
        ExplainCollector.addTotalBlocklets(totalBlocklets);
        ExplainCollector.addTotalBlocks(getTotalBlocks());
        ExplainCollector.addDefaultIndexPruningHit(hitBlocklets);
    }
    return blocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) ArrayList(java.util.ArrayList) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 13 with IndexRow

use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.

the class BlockIndex method loadToUnsafeBlock.

/**
 * Load information for the block.It is the case can happen only for old stores
 * where blocklet information is not available in index file. So load only block information
 * and read blocklet information in executor.
 */
protected IndexRowImpl loadToUnsafeBlock(CarbonRowSchema[] schema, CarbonRowSchema[] taskSummarySchema, DataFileFooter fileFooter, SegmentProperties segmentProperties, List<CarbonColumn> minMaxCacheColumns, String filePath, IndexRowImpl summaryRow, BlockMetaInfo blockMetaInfo, byte[][] minValues, byte[][] maxValues, boolean[] minMaxFlag) {
    // Add one row to maintain task level min max for segment pruning
    if (summaryRow == null) {
        summaryRow = new IndexRowImpl(taskSummarySchema);
    }
    IndexRow row = new IndexRowImpl(schema);
    int ordinal = 0;
    int taskMinMaxOrdinal = 1;
    // get min max values for columns to be cached
    byte[][] minValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minValues);
    byte[][] maxValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, maxValues);
    boolean[] minMaxFlagValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxFlagValuesForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minMaxFlag);
    IndexRow indexRow = addMinMax(schema[ordinal], minValuesForColumnsToBeCached);
    row.setRow(indexRow, ordinal);
    // compute and set task level min values
    addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, minValuesForColumnsToBeCached, TASK_MIN_VALUES_INDEX, true, fileFooter.getColumnInTable());
    ordinal++;
    taskMinMaxOrdinal++;
    row.setRow(addMinMax(schema[ordinal], maxValuesForColumnsToBeCached), ordinal);
    // compute and set task level max values
    addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, maxValuesForColumnsToBeCached, TASK_MAX_VALUES_INDEX, false, fileFooter.getColumnInTable());
    ordinal++;
    // add total rows in one carbondata file
    row.setInt((int) fileFooter.getNumberOfRows(), ordinal++);
    // add file name
    byte[] filePathBytes = CarbonTablePath.getCarbonDataFileName(filePath).getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
    row.setByteArray(filePathBytes, ordinal++);
    // add version number
    row.setShort(fileFooter.getVersionId().number(), ordinal++);
    // add schema updated time
    row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
    // add block offset
    row.setLong(fileFooter.getBlockInfo().getBlockOffset(), ordinal++);
    try {
        setLocations(blockMetaInfo.getLocationInfo(), row, ordinal++);
        // store block size
        row.setLong(blockMetaInfo.getSize(), ordinal++);
        // add min max flag for all the dimension columns
        addMinMaxFlagValues(row, schema[ordinal], minMaxFlagValuesForColumnsToBeCached, ordinal);
        memoryDMStore.addIndexRow(schema, row);
    } catch (Exception e) {
        String message = "Load to unsafe failed for block: " + filePath;
        LOGGER.error(message, e);
        throw new RuntimeException(message, e);
    }
    return summaryRow;
}
Also used : IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException)

Example 14 with IndexRow

use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.

the class UnsafeMemoryDMStore method addIndexRow.

/**
 * Add the index row to unsafe.
 * Below format is used to store data in memory block
 * WRITE:
 * <FD><FD><FD><VO><VO><VO><LO><VD><VD><VD>
 * FD: Fixed Column data
 * VO: Variable column data offset
 * VD: Variable column data
 * LO: Last Offset
 *
 * Read:
 * FD: Read directly based of byte position added in CarbonRowSchema
 *
 * VD: Read based on below logic
 * if not last variable column schema
 * X = read actual variable column offset based on byte position added in CarbonRowSchema
 * Y = read next variable column offset (next 4 bytes)
 * get the length
 * len  = (X-Y)
 * read data from offset X of size len
 *
 * if last variable column
 * X = read actual variable column offset based on byte postion added in CarbonRowSchema
 * Y = read last offset (next 4 bytes)
 * get the length
 * len  = (X-Y)
 * read data from offset X of size len
 *
 * @param indexRow
 */
public void addIndexRow(CarbonRowSchema[] schema, IndexRow indexRow) {
    // First calculate the required memory to keep the row in unsafe
    int rowSize = indexRow.getTotalSizeInBytes();
    // Check whether allocated memory is sufficient or not.
    ensureSize(rowSize);
    int pointer = runningLength;
    int bytePosition = 0;
    for (CarbonRowSchema carbonRowSchema : schema) {
        if (carbonRowSchema.getSchemaType() == CarbonRowSchema.IndexSchemaType.STRUCT) {
            CarbonRowSchema[] childSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
            for (int j = 0; j < childSchemas.length; j++) {
                if (childSchemas[j].getBytePosition() > bytePosition) {
                    bytePosition = childSchemas[j].getBytePosition();
                }
            }
        } else {
            if (carbonRowSchema.getBytePosition() > bytePosition) {
                bytePosition = carbonRowSchema.getBytePosition();
            }
        }
    }
    // byte position of Last offset
    bytePosition += CarbonCommonConstants.INT_SIZE_IN_BYTE;
    // start byte position of variable length data
    int varColPosition = bytePosition + CarbonCommonConstants.INT_SIZE_IN_BYTE;
    // current position refers to current byte position in memory block
    int currentPosition;
    for (int i = 0; i < schema.length; i++) {
        switch(schema[i].getSchemaType()) {
            case STRUCT:
                CarbonRowSchema[] childSchemas = ((CarbonRowSchema.StructCarbonRowSchema) schema[i]).getChildSchemas();
                IndexRow row = indexRow.getRow(i);
                for (int j = 0; j < childSchemas.length; j++) {
                    currentPosition = addToUnsafe(childSchemas[j], row, j, pointer, varColPosition);
                    if (currentPosition > 0) {
                        varColPosition = currentPosition;
                    }
                }
                break;
            default:
                currentPosition = addToUnsafe(schema[i], indexRow, i, pointer, varColPosition);
                if (currentPosition > 0) {
                    varColPosition = currentPosition;
                }
                break;
        }
    }
    // writing the last offset
    getUnsafe().putInt(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + pointer + bytePosition, varColPosition);
    // after adding last offset increment the length by 4 bytes as last position
    // written as INT
    runningLength += CarbonCommonConstants.INT_SIZE_IN_BYTE;
    pointers[rowCount++] = pointer;
}
Also used : UnsafeIndexRow(org.apache.carbondata.core.indexstore.row.UnsafeIndexRow) IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 15 with IndexRow

use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.

the class CarbonUtil method getMinMaxValue.

/**
 * This method returns the minmax value from the index row object
 * @return minmax byte array
 */
public static byte[][] getMinMaxValue(IndexRow row, int index) {
    IndexRow minMaxRow = row.getRow(index);
    byte[][] minMax = new byte[minMaxRow.getColumnCount()][];
    for (int i = 0; i < minMax.length; i++) {
        minMax[i] = minMaxRow.getByteArray(i);
    }
    return minMax;
}
Also used : IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow)

Aggregations

IndexRow (org.apache.carbondata.core.indexstore.row.IndexRow)15 CarbonRowSchema (org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)6 IndexRowImpl (org.apache.carbondata.core.indexstore.row.IndexRowImpl)5 IOException (java.io.IOException)3 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutput (java.io.DataOutput)1 DataOutputStream (java.io.DataOutputStream)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Blocklet (org.apache.carbondata.core.indexstore.Blocklet)1 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)1 UnsafeIndexRow (org.apache.carbondata.core.indexstore.row.UnsafeIndexRow)1 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)1 BlockletMinMaxIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex)1 FilterExecutor (org.apache.carbondata.core.scan.filter.executer.FilterExecutor)1 ImplicitColumnFilterExecutor (org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor)1