Search in sources :

Example 1 with IndexRowImpl

use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.

the class BlockIndex method loadBlockMetaInfo.

/**
 * Method to load block metadata information
 *
 * @param blockletIndexModel
 * @param indexInfo
 */
private IndexRowImpl loadBlockMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
    String tempFilePath = null;
    DataFileFooter previousDataFileFooter = null;
    int footerCounter = 0;
    byte[][] blockMinValues = null;
    byte[][] blockMaxValues = null;
    IndexRowImpl summaryRow = null;
    List<Short> blockletCountInEachBlock = new ArrayList<>(indexInfo.size());
    short totalBlockletsInOneBlock = 0;
    boolean isLastFileFooterEntryNeedToBeAdded = false;
    CarbonRowSchema[] schema = getFileFooterEntrySchema();
    // flag for each block entry
    boolean[] minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
    Arrays.fill(minMaxFlag, true);
    // min max flag for task summary
    boolean[] taskSummaryMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
    Arrays.fill(taskSummaryMinMaxFlag, true);
    long totalRowCount = 0;
    for (DataFileFooter fileFooter : indexInfo) {
        TableBlockInfo blockInfo = fileFooter.getBlockInfo();
        BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
        footerCounter++;
        if (blockMetaInfo != null) {
            // path is encountered
            if (null == tempFilePath) {
                tempFilePath = blockInfo.getFilePath();
                // 1st time assign the min and max values from the current file footer
                blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
                blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
                updateMinMaxFlag(fileFooter, minMaxFlag);
                updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
                previousDataFileFooter = fileFooter;
                totalBlockletsInOneBlock++;
            } else if (blockInfo.getFilePath().equals(tempFilePath)) {
                // After iterating over all the blocklets that belong to one block we need to compute the
                // min and max at block level. So compare min and max values and update if required
                BlockletMinMaxIndex currentFooterMinMaxIndex = fileFooter.getBlockletIndex().getMinMaxIndex();
                blockMinValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMinValues(), blockMinValues, true, fileFooter.getColumnInTable());
                blockMaxValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMaxValues(), blockMaxValues, false, fileFooter.getColumnInTable());
                updateMinMaxFlag(fileFooter, minMaxFlag);
                updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
                totalBlockletsInOneBlock++;
            }
            // block in the task. OR condition is to handle the loading of last file footer
            if (!blockInfo.getFilePath().equals(tempFilePath) || footerCounter == indexInfo.size()) {
                TableBlockInfo previousBlockInfo = previousDataFileFooter.getBlockInfo();
                summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousBlockInfo.getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousBlockInfo.getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
                totalRowCount += previousDataFileFooter.getNumberOfRows();
                minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
                Arrays.fill(minMaxFlag, true);
                // flag to check whether last file footer entry is different from previous entry.
                // If yes then it need to be added at last
                isLastFileFooterEntryNeedToBeAdded = (footerCounter == indexInfo.size()) && (!blockInfo.getFilePath().equals(tempFilePath));
                // assign local variables values using the current file footer
                tempFilePath = blockInfo.getFilePath();
                blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
                blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
                updateMinMaxFlag(fileFooter, minMaxFlag);
                updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
                previousDataFileFooter = fileFooter;
                blockletCountInEachBlock.add(totalBlockletsInOneBlock);
                // for next block count will start from 1 because a row is created whenever a new file
                // path comes. Here already a new file path has come so the count should start from 1
                totalBlockletsInOneBlock = 1;
            }
        }
    }
    // add the last file footer entry
    if (isLastFileFooterEntryNeedToBeAdded) {
        summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousDataFileFooter.getBlockInfo().getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousDataFileFooter.getBlockInfo().getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
        totalRowCount += previousDataFileFooter.getNumberOfRows();
        blockletCountInEachBlock.add(totalBlockletsInOneBlock);
    }
    byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountInEachBlock);
    // set the total row count
    summaryRow.setLong(totalRowCount, TASK_ROW_COUNT);
    // blocklet count index is the last index
    summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1);
    setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, taskSummaryMinMaxFlag);
    return summaryRow;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 2 with IndexRowImpl

use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.

the class BlockletIndex method loadBlockletMetaInfo.

/**
 * Method to load blocklet metadata information
 *
 * @param blockletIndexModel
 * @param indexInfo
 */
private IndexRowImpl loadBlockletMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
    String tempFilePath = null;
    IndexRowImpl summaryRow = null;
    CarbonRowSchema[] schema = getFileFooterEntrySchema();
    boolean[] summaryRowMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
    Arrays.fill(summaryRowMinMaxFlag, true);
    // Relative blocklet ID is the id assigned to a blocklet within a part file
    int relativeBlockletId = 0;
    for (DataFileFooter fileFooter : indexInfo) {
        // update the min max flag for summary row
        updateMinMaxFlag(fileFooter, summaryRowMinMaxFlag);
        TableBlockInfo blockInfo = fileFooter.getBlockInfo();
        BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
        // the file exists physically or not
        if (blockMetaInfo != null) {
            // blocklet ID will start from 0 again only when part file path is changed
            if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
                tempFilePath = blockInfo.getFilePath();
                relativeBlockletId = 0;
                blockNum++;
            }
            summaryRow = loadToUnsafe(schema, taskSummarySchema, fileFooter, segmentProperties, getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
            // this is done because relative blocklet id need to be incremented based on the
            // total number of blocklets
            relativeBlockletId += fileFooter.getBlockletList().size();
        }
    }
    summaryRow.setLong(0L, TASK_ROW_COUNT);
    setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, summaryRowMinMaxFlag);
    return summaryRow;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 3 with IndexRowImpl

use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.

the class BlockIndex method addMinMaxFlagValues.

protected void addMinMaxFlagValues(IndexRow row, CarbonRowSchema carbonRowSchema, boolean[] minMaxFlag, int ordinal) {
    CarbonRowSchema[] minMaxFlagSchema = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
    IndexRow minMaxFlagRow = new IndexRowImpl(minMaxFlagSchema);
    int flagOrdinal = 0;
    // min value adding
    for (int i = 0; i < minMaxFlag.length; i++) {
        minMaxFlagRow.setBoolean(minMaxFlag[i], flagOrdinal++);
    }
    row.setRow(minMaxFlagRow, ordinal);
}
Also used : IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 4 with IndexRowImpl

use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.

the class BlockIndex method init.

@Override
public void init(IndexModel indexModel) throws IOException {
    long startTime = System.currentTimeMillis();
    assert (indexModel instanceof BlockletIndexModel);
    BlockletIndexModel blockletIndexModel = (BlockletIndexModel) indexModel;
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(indexModel.getConfiguration());
    List<DataFileFooter> indexInfo = null;
    if (blockletIndexModel.getIndexInfos() == null || blockletIndexModel.getIndexInfos().isEmpty()) {
        indexInfo = fileFooterConverter.getIndexInfo(blockletIndexModel.getFilePath(), blockletIndexModel.getFileData(), blockletIndexModel.getCarbonTable().isTransactionalTable());
    } else {
        // when index info is already read and converted to data file footer object
        indexInfo = blockletIndexModel.getIndexInfos();
    }
    String path = blockletIndexModel.getFilePath();
    // store file path only in case of partition table, non transactional table and flat folder
    // structure
    byte[] filePath;
    this.isPartitionTable = blockletIndexModel.getCarbonTable().isHivePartitionTable();
    if (this.isPartitionTable || !blockletIndexModel.getCarbonTable().isTransactionalTable() || blockletIndexModel.getCarbonTable().isSupportFlatFolder() || // if the segment data is written in table path then no need to store whole path of file.
    !blockletIndexModel.getFilePath().startsWith(blockletIndexModel.getCarbonTable().getTablePath())) {
        filePath = FilenameUtils.getFullPathNoEndSeparator(path).getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
        isFilePathStored = true;
    } else {
        filePath = new byte[0];
    }
    byte[] fileName = path.substring(path.lastIndexOf("/") + 1).getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    byte[] segmentId = blockletIndexModel.getSegmentId().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    if (!indexInfo.isEmpty()) {
        DataFileFooter fileFooter = indexInfo.get(0);
        // init segment properties and create schema
        SegmentProperties segmentProperties = initSegmentProperties(blockletIndexModel, fileFooter);
        createMemorySchema(blockletIndexModel);
        createSummaryDMStore(blockletIndexModel);
        CarbonRowSchema[] taskSummarySchema = getTaskSummarySchema();
        // check for legacy store and load the metadata
        IndexRowImpl summaryRow = loadMetadata(taskSummarySchema, segmentProperties, blockletIndexModel, indexInfo);
        finishWriting(taskSummarySchema, filePath, fileName, segmentId, summaryRow);
        if (((BlockletIndexModel) indexModel).isSerializeDmStore()) {
            serializeDmStore();
        }
    }
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("Time taken to load blocklet index from file : " + indexModel.getFilePath() + " is " + (System.currentTimeMillis() - startTime));
    }
}
Also used : DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Example 5 with IndexRowImpl

use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.

the class BlockIndex method addMinMax.

protected IndexRow addMinMax(CarbonRowSchema carbonRowSchema, byte[][] minValues) {
    CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
    IndexRow minRow = new IndexRowImpl(minSchemas);
    int minOrdinal = 0;
    // min value adding
    for (int i = 0; i < minValues.length; i++) {
        minRow.setByteArray(minValues[i], minOrdinal++);
    }
    return minRow;
}
Also used : IndexRow(org.apache.carbondata.core.indexstore.row.IndexRow) IndexRowImpl(org.apache.carbondata.core.indexstore.row.IndexRowImpl) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)

Aggregations

IndexRowImpl (org.apache.carbondata.core.indexstore.row.IndexRowImpl)8 CarbonRowSchema (org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)6 IndexRow (org.apache.carbondata.core.indexstore.row.IndexRow)5 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)3 IOException (java.io.IOException)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 BlockMetaInfo (org.apache.carbondata.core.indexstore.BlockMetaInfo)2 BlockletMinMaxIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutput (java.io.DataOutput)1 DataOutputStream (java.io.DataOutputStream)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 ArrayList (java.util.ArrayList)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)1 DataFileFooterConverter (org.apache.carbondata.core.util.DataFileFooterConverter)1