use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.
the class BlockletIndex method loadToUnsafe.
private IndexRowImpl loadToUnsafe(CarbonRowSchema[] schema, CarbonRowSchema[] taskSummarySchema, DataFileFooter fileFooter, SegmentProperties segmentProperties, List<CarbonColumn> minMaxCacheColumns, String filePath, IndexRowImpl summaryRow, BlockMetaInfo blockMetaInfo, int relativeBlockletId) {
List<BlockletInfo> blockletList = fileFooter.getBlockletList();
// Add one row to maintain task level min max for segment pruning
if (!blockletList.isEmpty() && summaryRow == null) {
summaryRow = new IndexRowImpl(taskSummarySchema);
}
for (int index = 0; index < blockletList.size(); index++) {
IndexRow row = new IndexRowImpl(schema);
int ordinal = 0;
int taskMinMaxOrdinal = 1;
BlockletInfo blockletInfo = blockletList.get(index);
blockletInfo.setSorted(fileFooter.isSorted());
BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
// get min max values for columns to be cached
byte[][] minValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minMaxIndex.getMinValues());
byte[][] maxValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minMaxIndex.getMaxValues());
boolean[] minMaxFlagValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxFlagValuesForColumnsToBeCached(segmentProperties, minMaxCacheColumns, fileFooter.getBlockletIndex().getMinMaxIndex().getIsMinMaxSet());
row.setRow(addMinMax(schema[ordinal], minValuesForColumnsToBeCached), ordinal);
// compute and set task level min values
addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, minValuesForColumnsToBeCached, TASK_MIN_VALUES_INDEX, true, fileFooter.getColumnInTable());
ordinal++;
taskMinMaxOrdinal++;
row.setRow(addMinMax(schema[ordinal], maxValuesForColumnsToBeCached), ordinal);
// compute and set task level max values
addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, maxValuesForColumnsToBeCached, TASK_MAX_VALUES_INDEX, false, fileFooter.getColumnInTable());
ordinal++;
row.setInt(blockletInfo.getNumberOfRows(), ordinal++);
// add file name
byte[] filePathBytes = CarbonTablePath.getCarbonDataFileName(filePath).getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
row.setByteArray(filePathBytes, ordinal++);
// add version number
row.setShort(fileFooter.getVersionId().number(), ordinal++);
// add schema updated time
row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
byte[] serializedData;
try {
// Add block footer offset, it is used if we need to read footer of block
row.setLong(fileFooter.getBlockInfo().getBlockOffset(), ordinal++);
setLocations(blockMetaInfo.getLocationInfo(), row, ordinal++);
// Store block size
row.setLong(blockMetaInfo.getSize(), ordinal++);
// add min max flag for all the dimension columns
addMinMaxFlagValues(row, schema[ordinal], minMaxFlagValuesForColumnsToBeCached, ordinal);
ordinal++;
// add blocklet info
ByteArrayOutputStream stream = new ByteArrayOutputStream();
DataOutput dataOutput = new DataOutputStream(stream);
blockletInfo.write(dataOutput);
serializedData = stream.toByteArray();
row.setByteArray(serializedData, ordinal++);
// add pages
row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);
// for relative blocklet id i.e blocklet id that belongs to a particular carbondata file
row.setShort((short) relativeBlockletId++, ordinal);
memoryDMStore.addIndexRow(schema, row);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.
the class BlockIndex method prune.
private List<Blocklet> prune(FilterResolverIntf filterExp, FilterExecutor filterExecutor, SegmentProperties segmentProperties) {
if (memoryDMStore.getRowCount() == 0) {
return new ArrayList<>();
}
List<Blocklet> blocklets = new ArrayList<>();
CarbonRowSchema[] schema = getFileFooterEntrySchema();
String filePath = getFilePath();
int numEntries = memoryDMStore.getRowCount();
int totalBlocklets = 0;
if (ExplainCollector.enabled()) {
totalBlocklets = getTotalBlocklets();
}
int hitBlocklets = 0;
if (filterExp == null) {
for (int i = 0; i < numEntries; i++) {
IndexRow indexRow = memoryDMStore.getIndexRow(schema, i);
blocklets.add(createBlocklet(indexRow, getFileNameWithFilePath(indexRow, filePath), getBlockletId(indexRow), false));
}
hitBlocklets = totalBlocklets;
} else {
// Remove B-tree jump logic as start and end key prepared is not
// correct for old store scenarios
int entryIndex = 0;
// flag to be used for deciding whether use min/max in executor pruning for BlockletIndex
boolean useMinMaxForPruning = useMinMaxForExecutorPruning(filterExp);
if (!validateSegmentProperties(segmentProperties)) {
filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, getSegmentProperties(), null, getMinMaxCacheColumns(), false);
}
// min and max for executor pruning
while (entryIndex < numEntries) {
IndexRow row = memoryDMStore.getIndexRow(schema, entryIndex);
boolean[] minMaxFlag = getMinMaxFlag(row, BLOCK_MIN_MAX_FLAG);
String fileName = getFileNameWithFilePath(row, filePath);
short blockletId = getBlockletId(row);
boolean isValid = addBlockBasedOnMinMaxValue(filterExecutor, getMinMaxValue(row, MAX_VALUES_INDEX), getMinMaxValue(row, MIN_VALUES_INDEX), minMaxFlag, fileName, blockletId);
if (isValid) {
blocklets.add(createBlocklet(row, fileName, blockletId, useMinMaxForPruning));
if (ExplainCollector.enabled()) {
hitBlocklets += getBlockletNumOfEntry(entryIndex);
}
}
entryIndex++;
}
}
if (ExplainCollector.enabled()) {
ExplainCollector.setShowPruningInfo(true);
ExplainCollector.addTotalBlocklets(totalBlocklets);
ExplainCollector.addTotalBlocks(getTotalBlocks());
ExplainCollector.addDefaultIndexPruningHit(hitBlocklets);
}
return blocklets;
}
use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.
the class BlockIndex method loadToUnsafeBlock.
/**
* Load information for the block.It is the case can happen only for old stores
* where blocklet information is not available in index file. So load only block information
* and read blocklet information in executor.
*/
protected IndexRowImpl loadToUnsafeBlock(CarbonRowSchema[] schema, CarbonRowSchema[] taskSummarySchema, DataFileFooter fileFooter, SegmentProperties segmentProperties, List<CarbonColumn> minMaxCacheColumns, String filePath, IndexRowImpl summaryRow, BlockMetaInfo blockMetaInfo, byte[][] minValues, byte[][] maxValues, boolean[] minMaxFlag) {
// Add one row to maintain task level min max for segment pruning
if (summaryRow == null) {
summaryRow = new IndexRowImpl(taskSummarySchema);
}
IndexRow row = new IndexRowImpl(schema);
int ordinal = 0;
int taskMinMaxOrdinal = 1;
// get min max values for columns to be cached
byte[][] minValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minValues);
byte[][] maxValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxForColumnsToBeCached(segmentProperties, minMaxCacheColumns, maxValues);
boolean[] minMaxFlagValuesForColumnsToBeCached = BlockletIndexUtil.getMinMaxFlagValuesForColumnsToBeCached(segmentProperties, minMaxCacheColumns, minMaxFlag);
IndexRow indexRow = addMinMax(schema[ordinal], minValuesForColumnsToBeCached);
row.setRow(indexRow, ordinal);
// compute and set task level min values
addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, minValuesForColumnsToBeCached, TASK_MIN_VALUES_INDEX, true, fileFooter.getColumnInTable());
ordinal++;
taskMinMaxOrdinal++;
row.setRow(addMinMax(schema[ordinal], maxValuesForColumnsToBeCached), ordinal);
// compute and set task level max values
addTaskMinMaxValues(summaryRow, taskSummarySchema, taskMinMaxOrdinal, maxValuesForColumnsToBeCached, TASK_MAX_VALUES_INDEX, false, fileFooter.getColumnInTable());
ordinal++;
// add total rows in one carbondata file
row.setInt((int) fileFooter.getNumberOfRows(), ordinal++);
// add file name
byte[] filePathBytes = CarbonTablePath.getCarbonDataFileName(filePath).getBytes(CarbonCommonConstants.DEFAULT_CHARSET_CLASS);
row.setByteArray(filePathBytes, ordinal++);
// add version number
row.setShort(fileFooter.getVersionId().number(), ordinal++);
// add schema updated time
row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
// add block offset
row.setLong(fileFooter.getBlockInfo().getBlockOffset(), ordinal++);
try {
setLocations(blockMetaInfo.getLocationInfo(), row, ordinal++);
// store block size
row.setLong(blockMetaInfo.getSize(), ordinal++);
// add min max flag for all the dimension columns
addMinMaxFlagValues(row, schema[ordinal], minMaxFlagValuesForColumnsToBeCached, ordinal);
memoryDMStore.addIndexRow(schema, row);
} catch (Exception e) {
String message = "Load to unsafe failed for block: " + filePath;
LOGGER.error(message, e);
throw new RuntimeException(message, e);
}
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.
the class UnsafeMemoryDMStore method addIndexRow.
/**
* Add the index row to unsafe.
* Below format is used to store data in memory block
* WRITE:
* <FD><FD><FD><VO><VO><VO><LO><VD><VD><VD>
* FD: Fixed Column data
* VO: Variable column data offset
* VD: Variable column data
* LO: Last Offset
*
* Read:
* FD: Read directly based of byte position added in CarbonRowSchema
*
* VD: Read based on below logic
* if not last variable column schema
* X = read actual variable column offset based on byte position added in CarbonRowSchema
* Y = read next variable column offset (next 4 bytes)
* get the length
* len = (X-Y)
* read data from offset X of size len
*
* if last variable column
* X = read actual variable column offset based on byte postion added in CarbonRowSchema
* Y = read last offset (next 4 bytes)
* get the length
* len = (X-Y)
* read data from offset X of size len
*
* @param indexRow
*/
public void addIndexRow(CarbonRowSchema[] schema, IndexRow indexRow) {
// First calculate the required memory to keep the row in unsafe
int rowSize = indexRow.getTotalSizeInBytes();
// Check whether allocated memory is sufficient or not.
ensureSize(rowSize);
int pointer = runningLength;
int bytePosition = 0;
for (CarbonRowSchema carbonRowSchema : schema) {
if (carbonRowSchema.getSchemaType() == CarbonRowSchema.IndexSchemaType.STRUCT) {
CarbonRowSchema[] childSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
for (int j = 0; j < childSchemas.length; j++) {
if (childSchemas[j].getBytePosition() > bytePosition) {
bytePosition = childSchemas[j].getBytePosition();
}
}
} else {
if (carbonRowSchema.getBytePosition() > bytePosition) {
bytePosition = carbonRowSchema.getBytePosition();
}
}
}
// byte position of Last offset
bytePosition += CarbonCommonConstants.INT_SIZE_IN_BYTE;
// start byte position of variable length data
int varColPosition = bytePosition + CarbonCommonConstants.INT_SIZE_IN_BYTE;
// current position refers to current byte position in memory block
int currentPosition;
for (int i = 0; i < schema.length; i++) {
switch(schema[i].getSchemaType()) {
case STRUCT:
CarbonRowSchema[] childSchemas = ((CarbonRowSchema.StructCarbonRowSchema) schema[i]).getChildSchemas();
IndexRow row = indexRow.getRow(i);
for (int j = 0; j < childSchemas.length; j++) {
currentPosition = addToUnsafe(childSchemas[j], row, j, pointer, varColPosition);
if (currentPosition > 0) {
varColPosition = currentPosition;
}
}
break;
default:
currentPosition = addToUnsafe(schema[i], indexRow, i, pointer, varColPosition);
if (currentPosition > 0) {
varColPosition = currentPosition;
}
break;
}
}
// writing the last offset
getUnsafe().putInt(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + pointer + bytePosition, varColPosition);
// after adding last offset increment the length by 4 bytes as last position
// written as INT
runningLength += CarbonCommonConstants.INT_SIZE_IN_BYTE;
pointers[rowCount++] = pointer;
}
use of org.apache.carbondata.core.indexstore.row.IndexRow in project carbondata by apache.
the class CarbonUtil method getMinMaxValue.
/**
* This method returns the minmax value from the index row object
* @return minmax byte array
*/
public static byte[][] getMinMaxValue(IndexRow row, int index) {
IndexRow minMaxRow = row.getRow(index);
byte[][] minMax = new byte[minMaxRow.getColumnCount()][];
for (int i = 0; i < minMax.length; i++) {
minMax[i] = minMaxRow.getByteArray(i);
}
return minMax;
}
Aggregations