Search in sources :

Example 1 with DataMapRowImpl

use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.

the class BlockletDataMap method addMinMax.

private DataMapRow addMinMax(int[] minMaxLen, CarbonRowSchema carbonRowSchema, byte[][] minValues) {
    CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
    DataMapRow minRow = new DataMapRowImpl(minSchemas);
    int minOrdinal = 0;
    // min value adding
    for (int i = 0; i < minMaxLen.length; i++) {
        minRow.setByteArray(minValues[i], minOrdinal++);
    }
    return minRow;
}
Also used : CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataMapRow(org.apache.carbondata.core.indexstore.row.DataMapRow) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl)

Example 2 with DataMapRowImpl

use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.

the class BlockletDataMap method init.

@Override
public void init(DataMapModel dataMapModel) throws IOException, MemoryException {
    long startTime = System.currentTimeMillis();
    assert (dataMapModel instanceof BlockletDataMapModel);
    BlockletDataMapModel blockletDataMapInfo = (BlockletDataMapModel) dataMapModel;
    DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
    List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(blockletDataMapInfo.getFilePath(), blockletDataMapInfo.getFileData());
    Path path = new Path(blockletDataMapInfo.getFilePath());
    byte[] filePath = path.getParent().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    byte[] fileName = path.getName().toString().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    byte[] segmentId = blockletDataMapInfo.getSegmentId().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
    DataMapRowImpl summaryRow = null;
    byte[] schemaBinary = null;
    // below 2 variables will be used for fetching the relative blocklet id. Relative blocklet ID
    // is id assigned to a blocklet within a part file
    String tempFilePath = null;
    int relativeBlockletId = 0;
    for (DataFileFooter fileFooter : indexInfo) {
        if (segmentProperties == null) {
            List<ColumnSchema> columnInTable = fileFooter.getColumnInTable();
            schemaBinary = convertSchemaToBinary(columnInTable);
            columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality();
            segmentProperties = new SegmentProperties(columnInTable, columnCardinality);
            createSchema(segmentProperties);
            createSummarySchema(segmentProperties, schemaBinary, filePath, fileName, segmentId);
        }
        TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo();
        BlockMetaInfo blockMetaInfo = blockletDataMapInfo.getBlockMetaInfoMap().get(blockInfo.getFilePath());
        // the file exists physically or not
        if (blockMetaInfo != null) {
            if (fileFooter.getBlockletList() == null) {
                // This is old store scenario, here blocklet information is not available in index file so
                // load only block info
                summaryRow = loadToUnsafeBlock(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo);
            } else {
                // blocklet ID will start from 0 again only when part file path is changed
                if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
                    tempFilePath = blockInfo.getFilePath();
                    relativeBlockletId = 0;
                }
                summaryRow = loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
                // this is done because relative blocklet id need to be incremented based on the
                // total number of blocklets
                relativeBlockletId += fileFooter.getBlockletList().size();
            }
        }
    }
    if (unsafeMemoryDMStore != null) {
        unsafeMemoryDMStore.finishWriting();
    }
    if (null != unsafeMemorySummaryDMStore) {
        addTaskSummaryRowToUnsafeMemoryStore(summaryRow, schemaBinary, filePath, fileName, segmentId);
        unsafeMemorySummaryDMStore.finishWriting();
    }
    LOGGER.info("Time taken to load blocklet datamap from file : " + dataMapModel.getFilePath() + "is " + (System.currentTimeMillis() - startTime));
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 3 with DataMapRowImpl

use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.

the class BlockletDataMap method loadToUnsafeBlock.

/**
 * Load information for the block.It is the case can happen only for old stores
 * where blocklet information is not available in index file. So load only block information
 * and read blocklet information in executor.
 */
private DataMapRowImpl loadToUnsafeBlock(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo) {
    int[] minMaxLen = segmentProperties.getColumnsValueSize();
    BlockletIndex blockletIndex = fileFooter.getBlockletIndex();
    CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
    // Add one row to maintain task level min max for segment pruning
    if (summaryRow == null) {
        summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
    }
    DataMapRow row = new DataMapRowImpl(schema);
    int ordinal = 0;
    int taskMinMaxOrdinal = 0;
    // add start key as index key
    row.setByteArray(blockletIndex.getBtreeIndex().getStartKey(), ordinal++);
    BlockletMinMaxIndex minMaxIndex = blockletIndex.getMinMaxIndex();
    byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
    byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
    // update min max values in case of old store
    byte[][] updatedMinValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
    byte[][] updatedMaxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
    row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMinValues), ordinal);
    // compute and set task level min values
    addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMinValues, TASK_MIN_VALUES_INDEX, true);
    ordinal++;
    taskMinMaxOrdinal++;
    row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMaxValues), ordinal);
    // compute and set task level max values
    addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMaxValues, TASK_MAX_VALUES_INDEX, false);
    ordinal++;
    row.setInt((int) fileFooter.getNumberOfRows(), ordinal++);
    // add file path
    byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
    row.setByteArray(filePathBytes, ordinal++);
    // add pages
    row.setShort((short) 0, ordinal++);
    // add version number
    row.setShort(fileFooter.getVersionId().number(), ordinal++);
    // add schema updated time
    row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
    // add blocklet info
    row.setByteArray(new byte[0], ordinal++);
    row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
    try {
        setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
        ordinal++;
        // for relative blocklet id. Value is -1 because in case of old store blocklet info will
        // not be present in the index file and in that case we will not knwo the total number of
        // blocklets
        row.setShort((short) -1, ordinal++);
        // store block size
        row.setLong(blockMetaInfo.getSize(), ordinal);
        unsafeMemoryDMStore.addIndexRowToUnsafe(row);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return summaryRow;
}
Also used : BlockletIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex) BlockletMinMaxIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataMapRow(org.apache.carbondata.core.indexstore.row.DataMapRow) UnsupportedEncodingException(java.io.UnsupportedEncodingException) MemoryException(org.apache.carbondata.core.memory.MemoryException) IOException(java.io.IOException) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl)

Example 4 with DataMapRowImpl

use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.

the class BlockletDataMap method addTaskMinMaxValues.

/**
 * This method will compute min/max values at task level
 *
 * @param taskMinMaxRow
 * @param minMaxLen
 * @param carbonRowSchema
 * @param minMaxValue
 * @param ordinal
 * @param isMinValueComparison
 */
private void addTaskMinMaxValues(DataMapRow taskMinMaxRow, int[] minMaxLen, CarbonRowSchema carbonRowSchema, byte[][] minMaxValue, int ordinal, boolean isMinValueComparison) {
    DataMapRow row = taskMinMaxRow.getRow(ordinal);
    byte[][] updatedMinMaxValues = minMaxValue;
    if (null == row) {
        CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
        row = new DataMapRowImpl(minSchemas);
    } else {
        byte[][] existingMinMaxValues = getMinMaxValue(taskMinMaxRow, ordinal);
        // Compare and update min max values
        for (int i = 0; i < minMaxLen.length; i++) {
            int compare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(existingMinMaxValues[i], minMaxValue[i]);
            if (isMinValueComparison) {
                if (compare < 0) {
                    updatedMinMaxValues[i] = existingMinMaxValues[i];
                }
            } else if (compare > 0) {
                updatedMinMaxValues[i] = existingMinMaxValues[i];
            }
        }
    }
    int minMaxOrdinal = 0;
    // min/max value adding
    for (int i = 0; i < minMaxLen.length; i++) {
        row.setByteArray(updatedMinMaxValues[i], minMaxOrdinal++);
    }
    taskMinMaxRow.setRow(row, ordinal);
}
Also used : CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataMapRow(org.apache.carbondata.core.indexstore.row.DataMapRow) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl)

Example 5 with DataMapRowImpl

use of org.apache.carbondata.core.indexstore.row.DataMapRowImpl in project carbondata by apache.

the class BlockletDataMap method loadToUnsafe.

private DataMapRowImpl loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo, int relativeBlockletId) {
    int[] minMaxLen = segmentProperties.getColumnsValueSize();
    List<BlockletInfo> blockletList = fileFooter.getBlockletList();
    CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
    // Add one row to maintain task level min max for segment pruning
    if (!blockletList.isEmpty() && summaryRow == null) {
        summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
    }
    for (int index = 0; index < blockletList.size(); index++) {
        DataMapRow row = new DataMapRowImpl(schema);
        int ordinal = 0;
        int taskMinMaxOrdinal = 0;
        BlockletInfo blockletInfo = blockletList.get(index);
        // add start key as index key
        row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++);
        BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
        byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
        row.setRow(addMinMax(minMaxLen, schema[ordinal], minValues), ordinal);
        // compute and set task level min values
        addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], minValues, TASK_MIN_VALUES_INDEX, true);
        ordinal++;
        taskMinMaxOrdinal++;
        byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
        row.setRow(addMinMax(minMaxLen, schema[ordinal], maxValues), ordinal);
        // compute and set task level max values
        addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], maxValues, TASK_MAX_VALUES_INDEX, false);
        ordinal++;
        row.setInt(blockletInfo.getNumberOfRows(), ordinal++);
        // add file path
        byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
        row.setByteArray(filePathBytes, ordinal++);
        // add pages
        row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);
        // add version number
        row.setShort(fileFooter.getVersionId().number(), ordinal++);
        // add schema updated time
        row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
        // add blocklet info
        byte[] serializedData;
        try {
            ByteArrayOutputStream stream = new ByteArrayOutputStream();
            DataOutput dataOutput = new DataOutputStream(stream);
            blockletInfo.write(dataOutput);
            serializedData = stream.toByteArray();
            row.setByteArray(serializedData, ordinal++);
            // Add block footer offset, it is used if we need to read footer of block
            row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
            setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
            ordinal++;
            // for relative blockelt id i.e blocklet id that belongs to a particular part file
            row.setShort((short) relativeBlockletId++, ordinal++);
            // Store block size
            row.setLong(blockMetaInfo.getSize(), ordinal);
            unsafeMemoryDMStore.addIndexRowToUnsafe(row);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    return summaryRow;
}
Also used : DataOutput(java.io.DataOutput) DataOutputStream(java.io.DataOutputStream) BlockletInfo(org.apache.carbondata.core.metadata.blocklet.BlockletInfo) ByteArrayOutputStream(java.io.ByteArrayOutputStream) CarbonRowSchema(org.apache.carbondata.core.indexstore.schema.CarbonRowSchema) DataMapRow(org.apache.carbondata.core.indexstore.row.DataMapRow) UnsupportedEncodingException(java.io.UnsupportedEncodingException) MemoryException(org.apache.carbondata.core.memory.MemoryException) IOException(java.io.IOException) BlockletMinMaxIndex(org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex) DataMapRowImpl(org.apache.carbondata.core.indexstore.row.DataMapRowImpl)

Aggregations

DataMapRowImpl (org.apache.carbondata.core.indexstore.row.DataMapRowImpl)6 DataMapRow (org.apache.carbondata.core.indexstore.row.DataMapRow)4 CarbonRowSchema (org.apache.carbondata.core.indexstore.schema.CarbonRowSchema)4 IOException (java.io.IOException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 MemoryException (org.apache.carbondata.core.memory.MemoryException)2 BlockletMinMaxIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutput (java.io.DataOutput)1 DataOutputStream (java.io.DataOutputStream)1 ByteBuffer (java.nio.ByteBuffer)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)1 BlockMetaInfo (org.apache.carbondata.core.indexstore.BlockMetaInfo)1 BlockletInfo (org.apache.carbondata.core.metadata.blocklet.BlockletInfo)1 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)1 BlockletIndex (org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex)1 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)1 DataFileFooterConverter (org.apache.carbondata.core.util.DataFileFooterConverter)1 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)1