Search in sources :

Example 16 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockIndex method addTaskMinMaxValues.

/**
 * This method will compute min/max values at task level
 *
 * @param taskMinMaxRow
 * @param carbonRowSchema
 * @param taskMinMaxOrdinal
 * @param minMaxValue
 * @param ordinal
 * @param isMinValueComparison
 */
protected void addTaskMinMaxValues(IndexRow taskMinMaxRow, CarbonRowSchema[] carbonRowSchema, int taskMinMaxOrdinal, byte[][] minMaxValue, int ordinal, boolean isMinValueComparison, List<ColumnSchema> columnSchemaList) {
    IndexRow row = taskMinMaxRow.getRow(ordinal);
    byte[][] updatedMinMaxValues = null;
    if (null == row) {
        CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema[taskMinMaxOrdinal]).getChildSchemas();
        row = new IndexRowImpl(minSchemas);
        updatedMinMaxValues = minMaxValue;
    } else {
        byte[][] existingMinMaxValues = getMinMaxValue(taskMinMaxRow, ordinal);
        updatedMinMaxValues = compareAndUpdateMinMax(minMaxValue, existingMinMaxValues, isMinValueComparison, columnSchemaList);
    }
    int minMaxOrdinal = 0;
    // min/max value adding
    for (int i = 0; i < updatedMinMaxValues.length; i++) {
        row.setByteArray(updatedMinMaxValues[i], minMaxOrdinal++);
    }
    taskMinMaxRow.setRow(row, ordinal);
}
Also used : IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 17 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockIndex method prune.

private List<Blocklet> prune(FilterResolverIntf filterExp, FilterExecutor filterExecutor, SegmentProperties segmentProperties) {
    if (memoryDMStore.getRowCount() == 0) {
        return new ArrayList<>();
    }
    List<Blocklet> blocklets = new ArrayList<>();
    CarbonRowSchema[] schema = getFileFooterEntrySchema();
    String filePath = getFilePath();
    int numEntries = memoryDMStore.getRowCount();
    int totalBlocklets = 0;
    if (ExplainCollector.enabled()) {
        totalBlocklets = getTotalBlocklets();
    }
    int hitBlocklets = 0;
    if (filterExp == null) {
        for (int i = 0; i < numEntries; i++) {
            IndexRow indexRow = memoryDMStore.getIndexRow(schema, i);
            blocklets.add(createBlocklet(indexRow, getFileNameWithFilePath(indexRow, filePath), getBlockletId(indexRow), false));
        }
        hitBlocklets = totalBlocklets;
    } else {
        // Remove B-tree jump logic as start and end key prepared is not
        // correct for old store scenarios
        int entryIndex = 0;
        // flag to be used for deciding whether use min/max in executor pruning for BlockletIndex
        boolean useMinMaxForPruning = useMinMaxForExecutorPruning(filterExp);
        if (!validateSegmentProperties(segmentProperties)) {
            filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, getSegmentProperties(), null, getMinMaxCacheColumns(), false);
        }
        // min and max for executor pruning
        while (entryIndex < numEntries) {
            IndexRow row = memoryDMStore.getIndexRow(schema, entryIndex);
            boolean[] minMaxFlag = getMinMaxFlag(row, BLOCK_MIN_MAX_FLAG);
            String fileName = getFileNameWithFilePath(row, filePath);
            short blockletId = getBlockletId(row);
            boolean isValid = addBlockBasedOnMinMaxValue(filterExecutor, getMinMaxValue(row, MAX_VALUES_INDEX), getMinMaxValue(row, MIN_VALUES_INDEX), minMaxFlag, fileName, blockletId);
            if (isValid) {
                blocklets.add(createBlocklet(row, fileName, blockletId, useMinMaxForPruning));
                if (ExplainCollector.enabled()) {
                    hitBlocklets += getBlockletNumOfEntry(entryIndex);
                }
            }
            entryIndex++;
        }
    }
    if (ExplainCollector.enabled()) {
        ExplainCollector.setShowPruningInfo(true);
        ExplainCollector.addTotalBlocklets(totalBlocklets);
        ExplainCollector.addTotalBlocks(getTotalBlocks());
        ExplainCollector.addDefaultIndexPruningHit(hitBlocklets);
    }
    return blocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) ArrayList(java.util.ArrayList) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 18 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class UnsafeMemoryDMStore method addIndexRow.

/**
 * Add the index row to unsafe.
 * Below format is used to store data in memory block
 * WRITE:
 * <FD><FD><FD><VO><VO><VO><LO><VD><VD><VD>
 * FD: Fixed Column data
 * VO: Variable column data offset
 * VD: Variable column data
 * LO: Last Offset
 *
 * Read:
 * FD: Read directly based of byte position added in CarbonRowSchema
 *
 * VD: Read based on below logic
 * if not last variable column schema
 * X = read actual variable column offset based on byte position added in CarbonRowSchema
 * Y = read next variable column offset (next 4 bytes)
 * get the length
 * len  = (X-Y)
 * read data from offset X of size len
 *
 * if last variable column
 * X = read actual variable column offset based on byte postion added in CarbonRowSchema
 * Y = read last offset (next 4 bytes)
 * get the length
 * len  = (X-Y)
 * read data from offset X of size len
 *
 * @param indexRow
 */
public void addIndexRow(CarbonRowSchema[] schema, IndexRow indexRow) {
    // First calculate the required memory to keep the row in unsafe
    int rowSize = indexRow.getTotalSizeInBytes();
    // Check whether allocated memory is sufficient or not.
    ensureSize(rowSize);
    int pointer = runningLength;
    int bytePosition = 0;
    for (CarbonRowSchema carbonRowSchema : schema) {
        if (carbonRowSchema.getSchemaType() == CarbonRowSchema.IndexSchemaType.STRUCT) {
            CarbonRowSchema[] childSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
            for (int j = 0; j < childSchemas.length; j++) {
                if (childSchemas[j].getBytePosition() > bytePosition) {
                    bytePosition = childSchemas[j].getBytePosition();
                }
            }
        } else {
            if (carbonRowSchema.getBytePosition() > bytePosition) {
                bytePosition = carbonRowSchema.getBytePosition();
            }
        }
    }
    // byte position of Last offset
    bytePosition += CarbonCommonConstants.INT_SIZE_IN_BYTE;
    // start byte position of variable length data
    int varColPosition = bytePosition + CarbonCommonConstants.INT_SIZE_IN_BYTE;
    // current position refers to current byte position in memory block
    int currentPosition;
    for (int i = 0; i < schema.length; i++) {
        switch(schema[i].getSchemaType()) {
            case STRUCT:
                CarbonRowSchema[] childSchemas = ((CarbonRowSchema.StructCarbonRowSchema) schema[i]).getChildSchemas();
                IndexRow row = indexRow.getRow(i);
                for (int j = 0; j < childSchemas.length; j++) {
                    currentPosition = addToUnsafe(childSchemas[j], row, j, pointer, varColPosition);
                    if (currentPosition > 0) {
                        varColPosition = currentPosition;
                    }
                }
                break;
            default:
                currentPosition = addToUnsafe(schema[i], indexRow, i, pointer, varColPosition);
                if (currentPosition > 0) {
                    varColPosition = currentPosition;
                }
                break;
        }
    }
    // writing the last offset
    getUnsafe().putInt(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + pointer + bytePosition, varColPosition);
    // after adding last offset increment the length by 4 bytes as last position
    // written as INT
    runningLength += CarbonCommonConstants.INT_SIZE_IN_BYTE;
    pointers[rowCount++] = pointer;
}
Also used : UnsafeIndexRow(org.apache.carbondata.core.indexstore.row.UnsafeIndexRow) IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Aggregations

CarbonRowSchema (org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)18 IndexRow (org.apache.carbondata.core.indexstore.row.IndexRow)6 IndexRowImpl (org.apache.carbondata.core.indexstore.row.IndexRowImpl)6 DataMapRow (org.apache.carbondata.core.indexstore.row.DataMapRow)5 ArrayList (java.util.ArrayList)4 DataMapRowImpl (org.apache.carbondata.core.indexstore.row.DataMapRowImpl)4 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)3 BlockletMinMaxIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex)3 IOException (java.io.IOException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 BlockMetaInfo (org.apache.carbondata.core.indexstore.BlockMetaInfo)2 UnsafeMemoryDMStore (org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore)2 MemoryException (org.apache.carbondata.core.memory.MemoryException)2 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutput (java.io.DataOutput)1 DataOutputStream (java.io.DataOutputStream)1 HashMap (java.util.HashMap)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1