use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.
the class BlockIndex method loadBlockMetaInfo.
/**
* Method to load block metadata information
*
* @param blockletIndexModel
* @param indexInfo
*/
private IndexRowImpl loadBlockMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
String tempFilePath = null;
DataFileFooter previousDataFileFooter = null;
int footerCounter = 0;
byte[][] blockMinValues = null;
byte[][] blockMaxValues = null;
IndexRowImpl summaryRow = null;
List<Short> blockletCountInEachBlock = new ArrayList<>(indexInfo.size());
short totalBlockletsInOneBlock = 0;
boolean isLastFileFooterEntryNeedToBeAdded = false;
CarbonRowSchema[] schema = getFileFooterEntrySchema();
// flag for each block entry
boolean[] minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(minMaxFlag, true);
// min max flag for task summary
boolean[] taskSummaryMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(taskSummaryMinMaxFlag, true);
long totalRowCount = 0;
for (DataFileFooter fileFooter : indexInfo) {
TableBlockInfo blockInfo = fileFooter.getBlockInfo();
BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
footerCounter++;
if (blockMetaInfo != null) {
// path is encountered
if (null == tempFilePath) {
tempFilePath = blockInfo.getFilePath();
// 1st time assign the min and max values from the current file footer
blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
updateMinMaxFlag(fileFooter, minMaxFlag);
updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
previousDataFileFooter = fileFooter;
totalBlockletsInOneBlock++;
} else if (blockInfo.getFilePath().equals(tempFilePath)) {
// After iterating over all the blocklets that belong to one block we need to compute the
// min and max at block level. So compare min and max values and update if required
BlockletMinMaxIndex currentFooterMinMaxIndex = fileFooter.getBlockletIndex().getMinMaxIndex();
blockMinValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMinValues(), blockMinValues, true, fileFooter.getColumnInTable());
blockMaxValues = compareAndUpdateMinMax(currentFooterMinMaxIndex.getMaxValues(), blockMaxValues, false, fileFooter.getColumnInTable());
updateMinMaxFlag(fileFooter, minMaxFlag);
updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
totalBlockletsInOneBlock++;
}
// block in the task. OR condition is to handle the loading of last file footer
if (!blockInfo.getFilePath().equals(tempFilePath) || footerCounter == indexInfo.size()) {
TableBlockInfo previousBlockInfo = previousDataFileFooter.getBlockInfo();
summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousBlockInfo.getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousBlockInfo.getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
totalRowCount += previousDataFileFooter.getNumberOfRows();
minMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(minMaxFlag, true);
// flag to check whether last file footer entry is different from previous entry.
// If yes then it need to be added at last
isLastFileFooterEntryNeedToBeAdded = (footerCounter == indexInfo.size()) && (!blockInfo.getFilePath().equals(tempFilePath));
// assign local variables values using the current file footer
tempFilePath = blockInfo.getFilePath();
blockMinValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMinValues();
blockMaxValues = fileFooter.getBlockletIndex().getMinMaxIndex().getMaxValues();
updateMinMaxFlag(fileFooter, minMaxFlag);
updateMinMaxFlag(fileFooter, taskSummaryMinMaxFlag);
previousDataFileFooter = fileFooter;
blockletCountInEachBlock.add(totalBlockletsInOneBlock);
// for next block count will start from 1 because a row is created whenever a new file
// path comes. Here already a new file path has come so the count should start from 1
totalBlockletsInOneBlock = 1;
}
}
}
// add the last file footer entry
if (isLastFileFooterEntryNeedToBeAdded) {
summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties, getMinMaxCacheColumns(), previousDataFileFooter.getBlockInfo().getFilePath(), summaryRow, blockletIndexModel.getBlockMetaInfoMap().get(previousDataFileFooter.getBlockInfo().getFilePath()), blockMinValues, blockMaxValues, minMaxFlag);
totalRowCount += previousDataFileFooter.getNumberOfRows();
blockletCountInEachBlock.add(totalBlockletsInOneBlock);
}
byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountInEachBlock);
// set the total row count
summaryRow.setLong(totalRowCount, TASK_ROW_COUNT);
// blocklet count index is the last index
summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1);
setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, taskSummaryMinMaxFlag);
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.
the class BlockletIndex method loadBlockletMetaInfo.
/**
* Method to load blocklet metadata information
*
* @param blockletIndexModel
* @param indexInfo
*/
private IndexRowImpl loadBlockletMetaInfo(CarbonRowSchema[] taskSummarySchema, SegmentProperties segmentProperties, BlockletIndexModel blockletIndexModel, List<DataFileFooter> indexInfo) {
String tempFilePath = null;
IndexRowImpl summaryRow = null;
CarbonRowSchema[] schema = getFileFooterEntrySchema();
boolean[] summaryRowMinMaxFlag = new boolean[segmentProperties.getNumberOfColumns()];
Arrays.fill(summaryRowMinMaxFlag, true);
// Relative blocklet ID is the id assigned to a blocklet within a part file
int relativeBlockletId = 0;
for (DataFileFooter fileFooter : indexInfo) {
// update the min max flag for summary row
updateMinMaxFlag(fileFooter, summaryRowMinMaxFlag);
TableBlockInfo blockInfo = fileFooter.getBlockInfo();
BlockMetaInfo blockMetaInfo = blockletIndexModel.getBlockMetaInfoMap().get(blockInfo.getFilePath());
// the file exists physically or not
if (blockMetaInfo != null) {
// blocklet ID will start from 0 again only when part file path is changed
if (null == tempFilePath || !tempFilePath.equals(blockInfo.getFilePath())) {
tempFilePath = blockInfo.getFilePath();
relativeBlockletId = 0;
blockNum++;
}
summaryRow = loadToUnsafe(schema, taskSummarySchema, fileFooter, segmentProperties, getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow, blockMetaInfo, relativeBlockletId);
// this is done because relative blocklet id need to be incremented based on the
// total number of blocklets
relativeBlockletId += fileFooter.getBlockletList().size();
}
}
summaryRow.setLong(0L, TASK_ROW_COUNT);
setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, summaryRowMinMaxFlag);
return summaryRow;
}
use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.
the class BlockIndex method addMinMaxFlagValues.
protected void addMinMaxFlagValues(IndexRow row, CarbonRowSchema carbonRowSchema, boolean[] minMaxFlag, int ordinal) {
CarbonRowSchema[] minMaxFlagSchema = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
IndexRow minMaxFlagRow = new IndexRowImpl(minMaxFlagSchema);
int flagOrdinal = 0;
// min value adding
for (int i = 0; i < minMaxFlag.length; i++) {
minMaxFlagRow.setBoolean(minMaxFlag[i], flagOrdinal++);
}
row.setRow(minMaxFlagRow, ordinal);
}
use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.
the class BlockIndex method init.
@Override
public void init(IndexModel indexModel) throws IOException {
long startTime = System.currentTimeMillis();
assert (indexModel instanceof BlockletIndexModel);
BlockletIndexModel blockletIndexModel = (BlockletIndexModel) indexModel;
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(indexModel.getConfiguration());
List<DataFileFooter> indexInfo = null;
if (blockletIndexModel.getIndexInfos() == null || blockletIndexModel.getIndexInfos().isEmpty()) {
indexInfo = fileFooterConverter.getIndexInfo(blockletIndexModel.getFilePath(), blockletIndexModel.getFileData(), blockletIndexModel.getCarbonTable().isTransactionalTable());
} else {
// when index info is already read and converted to data file footer object
indexInfo = blockletIndexModel.getIndexInfos();
}
String path = blockletIndexModel.getFilePath();
// store file path only in case of partition table, non transactional table and flat folder
// structure
byte[] filePath;
this.isPartitionTable = blockletIndexModel.getCarbonTable().isHivePartitionTable();
if (this.isPartitionTable || !blockletIndexModel.getCarbonTable().isTransactionalTable() || blockletIndexModel.getCarbonTable().isSupportFlatFolder() || // if the segment data is written in table path then no need to store whole path of file.
!blockletIndexModel.getFilePath().startsWith(blockletIndexModel.getCarbonTable().getTablePath())) {
filePath = FilenameUtils.getFullPathNoEndSeparator(path).getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
isFilePathStored = true;
} else {
filePath = new byte[0];
}
byte[] fileName = path.substring(path.lastIndexOf("/") + 1).getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
byte[] segmentId = blockletIndexModel.getSegmentId().getBytes(CarbonCommonConstants.DEFAULT_CHARSET);
if (!indexInfo.isEmpty()) {
DataFileFooter fileFooter = indexInfo.get(0);
// init segment properties and create schema
SegmentProperties segmentProperties = initSegmentProperties(blockletIndexModel, fileFooter);
createMemorySchema(blockletIndexModel);
createSummaryDMStore(blockletIndexModel);
CarbonRowSchema[] taskSummarySchema = getTaskSummarySchema();
// check for legacy store and load the metadata
IndexRowImpl summaryRow = loadMetadata(taskSummarySchema, segmentProperties, blockletIndexModel, indexInfo);
finishWriting(taskSummarySchema, filePath, fileName, segmentId, summaryRow);
if (((BlockletIndexModel) indexModel).isSerializeDmStore()) {
serializeDmStore();
}
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Time taken to load blocklet index from file : " + indexModel.getFilePath() + " is " + (System.currentTimeMillis() - startTime));
}
}
use of org.apache.carbondata.core.indexstore.row.IndexRowImpl in project carbondata by apache.
the class BlockIndex method addMinMax.
protected IndexRow addMinMax(CarbonRowSchema carbonRowSchema, byte[][] minValues) {
CarbonRowSchema[] minSchemas = ((CarbonRowSchema.StructCarbonRowSchema) carbonRowSchema).getChildSchemas();
IndexRow minRow = new IndexRowImpl(minSchemas);
int minOrdinal = 0;
// min value adding
for (int i = 0; i < minValues.length; i++) {
minRow.setByteArray(minValues[i], minOrdinal++);
}
return minRow;
}
Aggregations