use of org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder in project carbondata by apache.
the class FilterExpressionProcessor method getFilterredBlocks.
/**
* This API will scan the Segment level all btrees and selects the required
* block reference nodes inorder to push the same to executer for applying filters
* on the respective data reference node.
* Following Algorithm is followed in below API
* Step:1 Get the start end key based on the filter tree resolver information
* Step:2 Prepare the IndexKeys inorder to scan the tree and get the start and end reference
* node(block)
* Step:3 Once data reference node ranges retrieved traverse the node within this range
* and select the node based on the block min and max value and the filter value.
* Step:4 The selected blocks will be send to executers for applying the filters with the help
* of Filter executers.
*
*/
public List<DataRefNode> getFilterredBlocks(DataRefNode btreeNode, FilterResolverIntf filterResolver, AbstractIndex tableSegment, AbsoluteTableIdentifier tableIdentifier) {
// Need to get the current dimension tables
List<DataRefNode> listOfDataBlocksToScan = new ArrayList<DataRefNode>();
// getting the start and end index key based on filter for hitting the
// selected block reference nodes based on filter resolver tree.
LOGGER.debug("preparing the start and end key for finding" + "start and end block as per filter resolver");
List<IndexKey> listOfStartEndKeys = new ArrayList<IndexKey>(2);
FilterUtil.traverseResolverTreeAndGetStartAndEndKey(tableSegment.getSegmentProperties(), filterResolver, listOfStartEndKeys);
// reading the first value from list which has start key
IndexKey searchStartKey = listOfStartEndKeys.get(0);
// reading the last value from list which has end key
IndexKey searchEndKey = listOfStartEndKeys.get(1);
if (null == searchStartKey && null == searchEndKey) {
try {
// TODO need to handle for no dictionary dimensions
searchStartKey = FilterUtil.prepareDefaultStartIndexKey(tableSegment.getSegmentProperties());
// TODO need to handle for no dictionary dimensions
searchEndKey = FilterUtil.prepareDefaultEndIndexKey(tableSegment.getSegmentProperties());
} catch (KeyGenException e) {
return listOfDataBlocksToScan;
}
}
LOGGER.debug("Successfully retrieved the start and end key" + "Dictionary Start Key: " + searchStartKey.getDictionaryKeys() + "No Dictionary Start Key " + searchStartKey.getNoDictionaryKeys() + "Dictionary End Key: " + searchEndKey.getDictionaryKeys() + "No Dictionary End Key " + searchEndKey.getNoDictionaryKeys());
long startTimeInMillis = System.currentTimeMillis();
DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(tableSegment.getSegmentProperties().getEachDimColumnValueSize(), tableSegment.getSegmentProperties().getNumberOfSortColumns(), tableSegment.getSegmentProperties().getNumberOfNoDictSortColumns());
DataRefNode startBlock = blockFinder.findFirstDataBlock(btreeNode, searchStartKey);
DataRefNode endBlock = blockFinder.findLastDataBlock(btreeNode, searchEndKey);
FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterResolver, tableSegment.getSegmentProperties(), null);
while (startBlock != endBlock) {
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, startBlock);
startBlock = startBlock.getNextDataRefNode();
}
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, endBlock);
LOGGER.info("Total Time in retrieving the data reference node" + "after scanning the btree " + (System.currentTimeMillis() - startTimeInMillis) + " Total number of data reference node for executing filter(s) " + listOfDataBlocksToScan.size());
return listOfDataBlocksToScan;
}
use of org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder in project carbondata by apache.
the class CarbonTableReader method getDataBlocksOfIndex.
/**
* get data blocks of given btree
*/
private List<DataRefNode> getDataBlocksOfIndex(AbstractIndex abstractIndex) {
List<DataRefNode> blocks = new LinkedList<DataRefNode>();
SegmentProperties segmentProperties = abstractIndex.getSegmentProperties();
try {
IndexKey startIndexKey = FilterUtil.prepareDefaultStartIndexKey(segmentProperties);
IndexKey endIndexKey = FilterUtil.prepareDefaultEndIndexKey(segmentProperties);
// Add all blocks of btree into result
DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(segmentProperties.getEachDimColumnValueSize(), segmentProperties.getNumberOfSortColumns(), segmentProperties.getNumberOfNoDictSortColumns());
DataRefNode startBlock = blockFinder.findFirstDataBlock(abstractIndex.getDataRefNode(), startIndexKey);
DataRefNode endBlock = blockFinder.findLastDataBlock(abstractIndex.getDataRefNode(), endIndexKey);
while (startBlock != endBlock) {
blocks.add(startBlock);
startBlock = startBlock.getNextDataRefNode();
}
blocks.add(endBlock);
} catch (KeyGenException e) {
System.out.println("Could not generate start key" + e.getMessage());
}
return blocks;
}
use of org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder in project carbondata by apache.
the class FilterExpressionProcessor method getFilterredBlocks.
/**
* This API will scan the Segment level all btrees and selects the required
* block reference nodes inorder to push the same to executer for applying filters
* on the respective data reference node.
* Following Algorithm is followed in below API
* Step:1 Get the start end key based on the filter tree resolver information
* Step:2 Prepare the IndexKeys inorder to scan the tree and get the start and end reference
* node(block)
* Step:3 Once data reference node ranges retrieved traverse the node within this range
* and select the node based on the block min and max value and the filter value.
* Step:4 The selected blocks will be send to executers for applying the filters with the help
* of Filter executers.
*/
public List<DataRefNode> getFilterredBlocks(DataRefNode btreeNode, FilterResolverIntf filterResolver, AbstractIndex tableSegment) {
// Need to get the current dimension tables
List<DataRefNode> listOfDataBlocksToScan = new ArrayList<DataRefNode>();
// selected block reference nodes based on filter resolver tree.
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("preparing the start and end key for finding" + "start and end block as per filter resolver");
}
IndexKey searchStartKey = null;
IndexKey searchEndKey = null;
try {
searchStartKey = FilterUtil.prepareDefaultStartIndexKey(tableSegment.getSegmentProperties());
searchEndKey = FilterUtil.prepareDefaultEndIndexKey(tableSegment.getSegmentProperties());
} catch (KeyGenException e) {
throw new RuntimeException(e);
}
if (LOGGER.isDebugEnabled()) {
char delimiter = ',';
LOGGER.debug("Successfully retrieved the start and end key" + "Dictionary Start Key: " + joinByteArray(searchStartKey.getDictionaryKeys(), delimiter) + "No Dictionary Start Key " + joinByteArray(searchStartKey.getNoDictionaryKeys(), delimiter) + "Dictionary End Key: " + joinByteArray(searchEndKey.getDictionaryKeys(), delimiter) + "No Dictionary End Key " + joinByteArray(searchEndKey.getNoDictionaryKeys(), delimiter));
}
long startTimeInMillis = System.currentTimeMillis();
DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(tableSegment.getSegmentProperties().getEachDimColumnValueSize(), tableSegment.getSegmentProperties().getNumberOfSortColumns(), tableSegment.getSegmentProperties().getNumberOfNoDictSortColumns());
DataRefNode startBlock = blockFinder.findFirstDataBlock(btreeNode, searchStartKey);
DataRefNode endBlock = blockFinder.findLastDataBlock(btreeNode, searchEndKey);
FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterResolver, tableSegment.getSegmentProperties(), null);
while (startBlock != endBlock) {
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, startBlock);
startBlock = startBlock.getNextDataRefNode();
}
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, endBlock);
LOGGER.info("Total Time in retrieving the data reference node" + "after scanning the btree " + (System.currentTimeMillis() - startTimeInMillis) + " Total number of data reference node for executing filter(s) " + listOfDataBlocksToScan.size());
return listOfDataBlocksToScan;
}
use of org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder in project carbondata by apache.
the class BlockLevelTraverser method getBlockRowMapping.
/**
* @param abstractIndex
* @param blockRowMap
* @param segId
* @param updateStatusManager
* @throws KeyGenException
*/
public long getBlockRowMapping(AbstractIndex abstractIndex, Map<String, Long> blockRowMap, String segId, SegmentUpdateStatusManager updateStatusManager) throws KeyGenException {
IndexKey searchStartKey = FilterUtil.prepareDefaultStartIndexKey(abstractIndex.getSegmentProperties());
DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(abstractIndex.getSegmentProperties().getEachDimColumnValueSize(), abstractIndex.getSegmentProperties().getNumberOfSortColumns(), abstractIndex.getSegmentProperties().getNumberOfNoDictSortColumns());
DataRefNode currentBlock = blockFinder.findFirstDataBlock(abstractIndex.getDataRefNode(), searchStartKey);
long count = 0;
while (currentBlock != null) {
String blockName = ((BlockBTreeLeafNode) currentBlock).getTableBlockInfo().getFilePath();
blockName = CarbonTablePath.getCarbonDataFileName(blockName);
blockName = blockName + CarbonTablePath.getCarbonDataExtension();
long rowCount = currentBlock.numRows();
String key = CarbonUpdateUtil.getSegmentBlockNameKey(segId, blockName);
// if block is invalid then dont add the count
SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
blockRowMap.put(key, rowCount);
count++;
}
currentBlock = currentBlock.getNextDataRefNode();
}
return count;
}
use of org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder in project carbondata by apache.
the class AbstractDetailQueryResultIterator method intialiseInfos.
private void intialiseInfos() {
for (BlockExecutionInfo blockInfo : blockExecutionInfos) {
Map<String, DeleteDeltaVo> deletedRowsMap = null;
DataRefNodeFinder finder = new BTreeDataRefNodeFinder(blockInfo.getEachColumnValueSize(), blockInfo.getDataBlock().getSegmentProperties().getNumberOfSortColumns(), blockInfo.getDataBlock().getSegmentProperties().getNumberOfNoDictSortColumns());
// if delete delta file is present
if (null != blockInfo.getDeleteDeltaFilePath() && 0 != blockInfo.getDeleteDeltaFilePath().length) {
DeleteDeltaInfo deleteDeltaInfo = new DeleteDeltaInfo(blockInfo.getDeleteDeltaFilePath());
// read and get the delete detail block details
deletedRowsMap = getDeleteDeltaDetails(blockInfo.getDataBlock(), deleteDeltaInfo);
// set the deleted row to block execution info
blockInfo.setDeletedRecordsMap(deletedRowsMap);
}
DataRefNode dataRefNode = blockInfo.getDataBlock().getDataRefNode();
if (dataRefNode instanceof BlockletDataRefNode) {
BlockletDataRefNode node = (BlockletDataRefNode) dataRefNode;
blockInfo.setFirstDataBlock(node);
blockInfo.setNumberOfBlockToScan(node.numberOfNodes());
} else {
DataRefNode startDataBlock = finder.findFirstDataBlock(dataRefNode, blockInfo.getStartKey());
while (startDataBlock.nodeIndex() < blockInfo.getStartBlockletIndex()) {
startDataBlock = startDataBlock.getNextDataRefNode();
}
long numberOfBlockToScan = blockInfo.getNumberOfBlockletToScan();
// if number of block is less than 0 then take end block.
if (numberOfBlockToScan <= 0) {
DataRefNode endDataBlock = finder.findLastDataBlock(dataRefNode, blockInfo.getEndKey());
numberOfBlockToScan = endDataBlock.nodeIndex() - startDataBlock.nodeIndex() + 1;
}
blockInfo.setFirstDataBlock(startDataBlock);
blockInfo.setNumberOfBlockToScan(numberOfBlockToScan);
}
}
}
Aggregations