use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockletDataMap method addMinMax.
private DataMapRow addMinMax(int[] minMaxLen, CarbonRowSchema carbonRowSchema, byte[][] minValues) {
CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
DataMapRow minRow = new DataMapRowImpl(minSchemas);
int minOrdinal = 0;
// min value adding
for (int i = 0; i < minMaxLen.length; i++) {
minRow.setByteArray(minValues[i], minOrdinal++);
}
return minRow;
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockletDataMap method getMinMaxSchema.
private void getMinMaxSchema(SegmentProperties segmentProperties, List<CarbonRowSchema> minMaxSchemas) {
// Index key
int[] minMaxLen = segmentProperties.getColumnsValueSize();
// do it 2 times, one for min and one for max.
for (int k = 0; k < 2; k++) {
CarbonRowSchema[] mapSchemas = new CarbonRowSchema[minMaxLen.length];
for (int i = 0; i < minMaxLen.length; i++) {
if (minMaxLen[i] <= 0) {
mapSchemas[i] = new CarbonRowSchema.VariableCarbonRowSchema(DataTypes.BYTE_ARRAY);
} else {
mapSchemas[i] = new CarbonRowSchema.FixedCarbonRowSchema(DataTypes.BYTE_ARRAY, minMaxLen[i]);
}
}
CarbonRowSchema mapSchema = new CarbonRowSchema.StructCarbonRowSchema(DataTypes.createDefaultStructType(), mapSchemas);
minMaxSchemas.add(mapSchema);
}
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockletDataMap method loadToUnsafeBlock.
/**
* Load information for the block.It is the case can happen only for old stores
* where blocklet information is not available in index file. So load only block information
* and read blocklet information in executor.
*/
private DataMapRowImpl loadToUnsafeBlock(DataFileFooter fileFooter, SegmentProperties segmentProperties, String filePath, DataMapRowImpl summaryRow, BlockMetaInfo blockMetaInfo) {
int[] minMaxLen = segmentProperties.getColumnsValueSize();
BlockletIndex blockletIndex = fileFooter.getBlockletIndex();
CarbonRowSchema[] schema = unsafeMemoryDMStore.getSchema();
// Add one row to maintain task level min max for segment pruning
if (summaryRow == null) {
summaryRow = new DataMapRowImpl(unsafeMemorySummaryDMStore.getSchema());
}
DataMapRow row = new DataMapRowImpl(schema);
int ordinal = 0;
int taskMinMaxOrdinal = 0;
// add start key as index key
row.setByteArray(blockletIndex.getBtreeIndex().getStartKey(), ordinal++);
BlockletMinMaxIndex minMaxIndex = blockletIndex.getMinMaxIndex();
byte[][] minValues = updateMinValues(minMaxIndex.getMinValues(), minMaxLen);
byte[][] maxValues = updateMaxValues(minMaxIndex.getMaxValues(), minMaxLen);
// update min max values in case of old store
byte[][] updatedMinValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
byte[][] updatedMaxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMinValues), ordinal);
// compute and set task level min values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMinValues, TASK_MIN_VALUES_INDEX, true);
ordinal++;
taskMinMaxOrdinal++;
row.setRow(addMinMax(minMaxLen, schema[ordinal], updatedMaxValues), ordinal);
// compute and set task level max values
addTaskMinMaxValues(summaryRow, minMaxLen, unsafeMemorySummaryDMStore.getSchema()[taskMinMaxOrdinal], updatedMaxValues, TASK_MAX_VALUES_INDEX, false);
ordinal++;
row.setInt((int) fileFooter.getNumberOfRows(), ordinal++);
// add file path
byte[] filePathBytes = filePath.getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
row.setByteArray(filePathBytes, ordinal++);
// add pages
row.setShort((short) 0, ordinal++);
// add version number
row.setShort(fileFooter.getVersionId().number(), ordinal++);
// add schema updated time
row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
// add blocklet info
row.setByteArray(new byte[0], ordinal++);
row.setLong(fileFooter.getBlockInfo().getTableBlockInfo().getBlockOffset(), ordinal++);
try {
setLocations(blockMetaInfo.getLocationInfo(), row, ordinal);
ordinal++;
// for relative blocklet id. Value is -1 because in case of old store blocklet info will
// not be present in the index file and in that case we will not knwo the total number of
// blocklets
row.setShort((short) -1, ordinal++);
// store block size
row.setLong(blockMetaInfo.getSize(), ordinal);
unsafeMemoryDMStore.addIndexRowToUnsafe(row);
} catch (Exception e) {
throw new RuntimeException(e);
}
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockIndex method loadBlockMetaInfo.
/**
* Method to load block metadata information
*
* @param blockletIndexModel
* @param indexInfo
*/
private IndexRowImpl loadBlockMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
String tempFilePath = null;
DataFileFooter previousDataFileFooter = null;
int footerCounter = 0;
byte[][] blockMinValues = null;
byte[][] blockMaxValues = null;
IndexRowImpl summaryRow = null;
List<Short> blockletCountInEachBlock = new ArrayList<>(indexInfo.size());
short totalBlockletsInOneBlock = 0;
boolean isLastFileFooterEntryNeedToBeAdded = false;
CarbonRowSchema[] schema = getFileFooterEntrySchema();
// flag for each block entry
boolean[] minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(minMaxFlag, true);
// min max flag for task summary
boolean[] taskSummaryMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(taskSummaryMinMaxFlag, true);
long totalRowCount = 0;
for (DataFileFooter fileFooter : indexInfo) {
TableBlockInfo blockInfo = fileFooter.getBlockInfo();
BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
footerCounter++;
if (blockMetaInfo != null) {
// path is encountered
if (null == tempFilePath) {
tempFilePath = blockInfo.getFilePath();
// 1st time assign the min and max values from the current file footer
blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
updateMinMaxFlag(fileFooter, minMaxFlag);
updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
previousDataFileFooter = fileFooter;
totalBlockletsInOneBlock++;
} else if (blockInfo.getFilePath().equals(tempFilePath)) {
// After iterating over all the blocklets that belong to one block we need to compute the
// min and max at block level. So compare min and max values and update if required
BlockletMinMaxIndex currentFooterMinMaxIndex = fileFooter.getBlockletIndex().getMinMaxIndex();
blockMinValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMinValues(), blockMinValues, true, fileFooter.getColumnInTable());
blockMaxValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMaxValues(), blockMaxValues, false, fileFooter.getColumnInTable());
updateMinMaxFlag(fileFooter, minMaxFlag);
updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
totalBlockletsInOneBlock++;
}
// block in the task. OR condition is to handle the loading of last file footer
if (!blockInfo.getFilePath().equals(tempFilePath) || footerCounter == indexInfo.size()) {
TableBlockInfo previousBlockInfo = previousDataFileFooter.getBlockInfo();
summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousBlockInfo.getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousBlockInfo.getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
totalRowCount += previousDataFileFooter.getNumberOfRows();
minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(minMaxFlag, true);
// flag to check whether last file footer entry is different from previous entry.
// If yes then it need to be added at last
isLastFileFooterEntryNeedToBeAdded = (footerCounter == indexInfo.size()) && (!blockInfo.getFilePath().equals(tempFilePath));
// assign local variables values using the current file footer
tempFilePath = blockInfo.getFilePath();
blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
updateMinMaxFlag(fileFooter, minMaxFlag);
updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
previousDataFileFooter = fileFooter;
blockletCountInEachBlock.add(totalBlockletsInOneBlock);
// for next block count will start from 1 because a row is created whenever a new file
// path comes. Here already a new file path has come so the count should start from 1
totalBlockletsInOneBlock = 1;
}
}
}
// add the last file footer entry
if (isLastFileFooterEntryNeedToBeAdded) {
summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousDataFileFooter.getBlockInfo().getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousDataFileFooter.getBlockInfo().getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
totalRowCount += previousDataFileFooter.getNumberOfRows();
blockletCountInEachBlock.add(totalBlockletsInOneBlock);
}
byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountInEachBlock);
// set the total row count
summaryRow.setLong(totalRowCount, TASK_ROW_COUNT);
// blocklet count index is the last index
summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1);
setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, taskSummaryMinMaxFlag);
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.schema.CarbonRowSchema in project carbondata by apache.
the class BlockletIndex method loadBlockletMetaInfo.
/**
* Method to load blocklet metadata information
*
* @param blockletIndexModel
* @param indexInfo
*/
private IndexRowImpl loadBlockletMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
String tempFilePath = null;
IndexRowImpl summaryRow = null;
CarbonRowSchema[] schema = getFileFooterEntrySchema();
boolean[] summaryRowMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(summaryRowMinMaxFlag, true);
// Relative blocklet ID is the id assigned to a blocklet within a part file
int relativeBlockletId = 0;
for (DataFileFooter fileFooter : indexInfo) {
// update the min max flag for summary row
updateMinMaxFlag(fileFooter, summaryRowMinMaxFlag);
TableBlockInfo blockInfo = fileFooter.getBlockInfo();
BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
// the file exists physically or not
if (blockMetaInfo != null) {
// blocklet ID will start from 0 again only when part file path is changed
if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
tempFilePath = blockInfo.getFilePath();
relativeBlockletId = 0;
blockNum++;
}
summaryRow = loadToUnsafe(schema, taskSummarySchema, fileFooter, segmentProperties, getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
// this is done because relative blocklet id need to be incremented based on the
// total number of blocklets
relativeBlockletId += fileFooter.getBlockletList().size();
}
}
summaryRow.setLong(0L, TASK_ROW_COUNT);
setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, summaryRowMinMaxFlag);
return summaryRow;
}
Aggregations