Examples with CarbonRowSchema - org.apache.carbondata.core.indexstore.schema.CarbonRowSchema

Example 1 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockletDataMap method addMinMax.

private DataMapRow addMinMax(int[] minMaxLen, CarbonRowSchema carbonRowSchema, byte[][] minValues) {
    CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
    DataMapRow minRow = new DataMapRowImpl(minSchemas);
    int minOrdinal = 0;
    // min value adding
    for (int i = 0; i < minMaxLen.length; i++) {
        minRow.setByteArray(minValues[i], minOrdinal++);
    }
    return minRow;
}

Also used : CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataMapRow(org.apache.carbondata.core.indexstore.row.DataMapRow) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl)

Example 2 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockletDataMap method getMinMaxSchema.

private void getMinMaxSchema(SegmentProperties segmentProperties, List<CarbonRowSchema> minMaxSchemas) {
    // Index key
    int[] minMaxLen = segmentProperties.getColumnsValueSize();
    // do it 2 times, one for min and one for max.
    for (int k = 0; k < 2; k++) {
        CarbonRowSchema[] mapSchemas = new CarbonRowSchema[minMaxLen.length];
        for (int i = 0; i < minMaxLen.length; i++) {
            if (minMaxLen[i] <= 0) {
                mapSchemas[i] = new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY);
            } else {
                mapSchemas[i] = new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, minMaxLen[i]);
            }
        }
        CarbonRowSchema mapSchema = new CarbonRowSchema.StructCarbonRowSchema(DataTypes.createDefaultStructType(), mapSchemas);
        minMaxSchemas.add(mapSchema);
    }
}

Also used : CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 3 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockletDataMap method loadToUnsafeBlock.

/**
 * Load information for the block.It is the case can happen only for old stores
 * where blocklet information is not available in index file. So load only block information
 * and read blocklet information in executor.
 */
private DataMapRowImpl loadToUnsafeBlock(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo) {
    int[] minMaxLen = segmentProperties.getColumnsValueSize();
    BlockletIndex blockletIndex = fileFooter.getBlockletIndex();
    CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
    // Add one row to maintain task level min max for segment pruning
    if (summaryRow == null) {
        summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
    }
    DataMapRow row = new DataMapRowImpl(schema);
    int ordinal = 0;
    int taskMinMaxOrdinal = 0;
    // add start key as index key
    row.setByteArray(blockletIndex.getBtreeIndex().getStartKey(), ordinal++);
    BlockletMinMaxIndex minMaxIndex = blockletIndex.getMinMaxIndex();
    byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
    byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
    // update min max values in case of old store
    byte[][] updatedMinValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
    byte[][] updatedMaxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
    row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMinValues), ordinal);
    // compute and set task level min values
    addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMinValues, TASK_MIN_VALUES_INDEX, true);
    ordinal++;
    taskMinMaxOrdinal++;
    row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMaxValues), ordinal);
    // compute and set task level max values
    addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMaxValues, TASK_MAX_VALUES_INDEX, false);
    ordinal++;
    row.setInt((int) fileFooter.getNumberOfRows(), ordinal++);
    // add file path
    byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
    row.setByteArray(filePathBytes, ordinal++);
    // add pages
    row.setShort((short) 0, ordinal++);
    // add version number
    row.setShort(fileFooter.getVersionId().number(), ordinal++);
    // add schema updated time
    row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
    // add blocklet info
    row.setByteArray(new byte[0], ordinal++);
    row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
    try {
        setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
        ordinal++;
        // for relative blocklet id. Value is -1 because in case of old store blocklet info will
        // not be present in the index file and in that case we will not knwo the total number of
        // blocklets
        row.setShort((short) -1, ordinal++);
        // store block size
        row.setLong(blockMetaInfo.getSize(), ordinal);
        unsafeMemoryDMStore.addIndexRowToUnsafe(row);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return summaryRow;
}

Also used : BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataMapRow(org.apache.carbondata.core.indexstore.row.DataMapRow) UnsupportedEncodingException(java.io.UnsupportedEncodingException) MemoryException(org.apache.carbondata.core.memory.MemoryException) IOException(java.io.IOException) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl)

Example 4 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockIndex method loadBlockMetaInfo.

/**
 * Method to load block metadata information
 *
 * @param blockletIndexModel
 * @param indexInfo
 */
private IndexRowImpl loadBlockMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
    String tempFilePath = null;
    DataFileFooter previousDataFileFooter = null;
    int footerCounter = 0;
    byte[][] blockMinValues = null;
    byte[][] blockMaxValues = null;
    IndexRowImpl summaryRow = null;
    List<Short> blockletCountInEachBlock = new ArrayList<>(indexInfo.size());
    short totalBlockletsInOneBlock = 0;
    boolean isLastFileFooterEntryNeedToBeAdded = false;
    CarbonRowSchema[] schema = getFileFooterEntrySchema();
    // flag for each block entry
    boolean[] minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
    Arrays.fill(minMaxFlag, true);
    // min max flag for task summary
    boolean[] taskSummaryMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
    Arrays.fill(taskSummaryMinMaxFlag, true);
    long totalRowCount = 0;
    for (DataFileFooter fileFooter : indexInfo) {
        TableBlockInfo blockInfo = fileFooter.getBlockInfo();
        BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
        footerCounter++;
        if (blockMetaInfo != null) {
            // path is encountered
            if (null == tempFilePath) {
                tempFilePath = blockInfo.getFilePath();
                // 1st time assign the min and max values from the current file footer
                blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
                blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
                updateMinMaxFlag(fileFooter, minMaxFlag);
                updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
                previousDataFileFooter = fileFooter;
                totalBlockletsInOneBlock++;
            } else if (blockInfo.getFilePath().equals(tempFilePath)) {
                // After iterating over all the blocklets that belong to one block we need to compute the
                // min and max at block level. So compare min and max values and update if required
                BlockletMinMaxIndex currentFooterMinMaxIndex = fileFooter.getBlockletIndex().getMinMaxIndex();
                blockMinValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMinValues(), blockMinValues, true, fileFooter.getColumnInTable());
                blockMaxValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMaxValues(), blockMaxValues, false, fileFooter.getColumnInTable());
                updateMinMaxFlag(fileFooter, minMaxFlag);
                updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
                totalBlockletsInOneBlock++;
            }
            // block in the task. OR condition is to handle the loading of last file footer
            if (!blockInfo.getFilePath().equals(tempFilePath) || footerCounter == indexInfo.size()) {
                TableBlockInfo previousBlockInfo = previousDataFileFooter.getBlockInfo();
                summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousBlockInfo.getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousBlockInfo.getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
                totalRowCount += previousDataFileFooter.getNumberOfRows();
                minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
                Arrays.fill(minMaxFlag, true);
                // flag to check whether last file footer entry is different from previous entry.
                // If yes then it need to be added at last
                isLastFileFooterEntryNeedToBeAdded = (footerCounter == indexInfo.size()) && (!blockInfo.getFilePath().equals(tempFilePath));
                // assign local variables values using the current file footer
                tempFilePath = blockInfo.getFilePath();
                blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
                blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
                updateMinMaxFlag(fileFooter, minMaxFlag);
                updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
                previousDataFileFooter = fileFooter;
                blockletCountInEachBlock.add(totalBlockletsInOneBlock);
                // for next block count will start from 1 because a row is created whenever a new file
                // path comes. Here already a new file path has come so the count should start from 1
                totalBlockletsInOneBlock = 1;
            }
        }
    }
    // add the last file footer entry
    if (isLastFileFooterEntryNeedToBeAdded) {
        summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousDataFileFooter.getBlockInfo().getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousDataFileFooter.getBlockInfo().getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
        totalRowCount += previousDataFileFooter.getNumberOfRows();
        blockletCountInEachBlock.add(totalBlockletsInOneBlock);
    }
    byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountInEachBlock);
    // set the total row count
    summaryRow.setLong(totalRowCount, TASK_ROW_COUNT);
    // blocklet count index is the last index
    summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1);
    setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, taskSummaryMinMaxFlag);
    return summaryRow;
}

Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 5 with CarbonRowSchema

use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.

the class BlockletIndex method loadBlockletMetaInfo.

/**
 * Method to load blocklet metadata information
 *
 * @param blockletIndexModel
 * @param indexInfo
 */
private IndexRowImpl loadBlockletMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
    String tempFilePath = null;
    IndexRowImpl summaryRow = null;
    CarbonRowSchema[] schema = getFileFooterEntrySchema();
    boolean[] summaryRowMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
    Arrays.fill(summaryRowMinMaxFlag, true);
    // Relative blocklet ID is the id assigned to a blocklet within a part file
    int relativeBlockletId = 0;
    for (DataFileFooter fileFooter : indexInfo) {
        // update the min max flag for summary row
        updateMinMaxFlag(fileFooter, summaryRowMinMaxFlag);
        TableBlockInfo blockInfo = fileFooter.getBlockInfo();
        BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
        // the file exists physically or not
        if (blockMetaInfo != null) {
            // blocklet ID will start from 0 again only when part file path is changed
            if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
                tempFilePath = blockInfo.getFilePath();
                relativeBlockletId = 0;
                blockNum++;
            }
            summaryRow = loadToUnsafe(schema, taskSummarySchema, fileFooter, segmentProperties, getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
            // this is done because relative blocklet id need to be incremented based on the
            // total number of blocklets
            relativeBlockletId += fileFooter.getBlockletList().size();
        }
    }
    summaryRow.setLong(0L, TASK_ROW_COUNT);
    setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, summaryRowMinMaxFlag);
    return summaryRow;
}

Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Aggregations

CarbonRowSchema (org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)18 IndexRow (org.apache.carbondata.core.indexstore.row.IndexRow)6 IndexRowImpl (org.apache.carbondata.core.indexstore.row.IndexRowImpl)6 DataMapRow (org.apache.carbondata.core.indexstore.row.DataMapRow)5 ArrayList (java.util.ArrayList)4 DataMapRowImpl (org.apache.carbondata.core.indexstore.row.DataMapRowImpl)4 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)3 BlockletMinMaxIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex)3 IOException (java.io.IOException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 BlockMetaInfo (org.apache.carbondata.core.indexstore.BlockMetaInfo)2 UnsafeMemoryDMStore (org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore)2 MemoryException (org.apache.carbondata.core.memory.MemoryException)2 DataType (org.apache.carbondata.core.metadata.datatype.DataType)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutput (java.io.DataOutput)1 DataOutputStream (java.io.DataOutputStream)1 HashMap (java.util.HashMap)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1