use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.
the class FilterExpressionProcessor method getFilterredBlocks.
/**
* This API will scan the Segment level all btrees and selects the required
* block reference nodes inorder to push the same to executer for applying filters
* on the respective data reference node.
* Following Algorithm is followed in below API
* Step:1 Get the start end key based on the filter tree resolver information
* Step:2 Prepare the IndexKeys inorder to scan the tree and get the start and end reference
* node(block)
* Step:3 Once data reference node ranges retrieved traverse the node within this range
* and select the node based on the block min and max value and the filter value.
* Step:4 The selected blocks will be send to executers for applying the filters with the help
* of Filter executers.
*
*/
public List<DataRefNode> getFilterredBlocks(DataRefNode btreeNode, FilterResolverIntf filterResolver, AbstractIndex tableSegment, AbsoluteTableIdentifier tableIdentifier) {
// Need to get the current dimension tables
List<DataRefNode> listOfDataBlocksToScan = new ArrayList<DataRefNode>();
// getting the start and end index key based on filter for hitting the
// selected block reference nodes based on filter resolver tree.
LOGGER.debug("preparing the start and end key for finding" + "start and end block as per filter resolver");
List<IndexKey> listOfStartEndKeys = new ArrayList<IndexKey>(2);
FilterUtil.traverseResolverTreeAndGetStartAndEndKey(tableSegment.getSegmentProperties(), filterResolver, listOfStartEndKeys);
// reading the first value from list which has start key
IndexKey searchStartKey = listOfStartEndKeys.get(0);
// reading the last value from list which has end key
IndexKey searchEndKey = listOfStartEndKeys.get(1);
if (null == searchStartKey && null == searchEndKey) {
try {
// TODO need to handle for no dictionary dimensions
searchStartKey = FilterUtil.prepareDefaultStartIndexKey(tableSegment.getSegmentProperties());
// TODO need to handle for no dictionary dimensions
searchEndKey = FilterUtil.prepareDefaultEndIndexKey(tableSegment.getSegmentProperties());
} catch (KeyGenException e) {
return listOfDataBlocksToScan;
}
}
LOGGER.debug("Successfully retrieved the start and end key" + "Dictionary Start Key: " + searchStartKey.getDictionaryKeys() + "No Dictionary Start Key " + searchStartKey.getNoDictionaryKeys() + "Dictionary End Key: " + searchEndKey.getDictionaryKeys() + "No Dictionary End Key " + searchEndKey.getNoDictionaryKeys());
long startTimeInMillis = System.currentTimeMillis();
DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(tableSegment.getSegmentProperties().getEachDimColumnValueSize(), tableSegment.getSegmentProperties().getNumberOfSortColumns(), tableSegment.getSegmentProperties().getNumberOfNoDictSortColumns());
DataRefNode startBlock = blockFinder.findFirstDataBlock(btreeNode, searchStartKey);
DataRefNode endBlock = blockFinder.findLastDataBlock(btreeNode, searchEndKey);
FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterResolver, tableSegment.getSegmentProperties(), null);
while (startBlock != endBlock) {
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, startBlock);
startBlock = startBlock.getNextDataRefNode();
}
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, endBlock);
LOGGER.info("Total Time in retrieving the data reference node" + "after scanning the btree " + (System.currentTimeMillis() - startTimeInMillis) + " Total number of data reference node for executing filter(s) " + listOfDataBlocksToScan.size());
return listOfDataBlocksToScan;
}
use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.
the class CarbonInputFormat method getDataBlocksOfSegment.
/**
* get data blocks of given segment
*/
private List<DataRefNode> getDataBlocksOfSegment(JobContext job, FilterExpressionProcessor filterExpressionProcessor, AbsoluteTableIdentifier absoluteTableIdentifier, FilterResolverIntf resolver, BitSet matchedPartitions, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
try {
QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
QueryStatistic statistic = new QueryStatistic();
segmentIndexMap = getSegmentAbstractIndexs(job, absoluteTableIdentifier, segmentId, cacheClient, updateStatusManager);
List<DataRefNode> resultFilterredBlocks = new LinkedList<DataRefNode>();
if (null != segmentIndexMap) {
for (Map.Entry<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> entry : segmentIndexMap.entrySet()) {
SegmentTaskIndexStore.TaskBucketHolder taskHolder = entry.getKey();
int taskId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(taskHolder.taskNo);
// if this partition is not required, here will skip it.
if (matchedPartitions == null || matchedPartitions.get(taskId)) {
AbstractIndex abstractIndex = entry.getValue();
List<DataRefNode> filterredBlocks;
// if no filter is given get all blocks from Btree Index
if (null == resolver) {
filterredBlocks = getDataBlocksOfIndex(abstractIndex);
} else {
// apply filter and get matching blocks
filterredBlocks = filterExpressionProcessor.getFilterredBlocks(abstractIndex.getDataRefNode(), resolver, abstractIndex, absoluteTableIdentifier);
}
resultFilterredBlocks.addAll(filterredBlocks);
}
}
}
statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
return resultFilterredBlocks;
} finally {
// low memory systems the same memory can be utilized efficiently
if (null != segmentIndexMap) {
List<TableSegmentUniqueIdentifier> tableSegmentUniqueIdentifiers = new ArrayList<>(1);
tableSegmentUniqueIdentifiers.add(new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId));
cacheClient.getSegmentAccessClient().clearAccessCount(tableSegmentUniqueIdentifiers);
}
}
}
use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.
the class BTreeBlockFinderTest method testBtreeBuilderGetMethodIsGivingNotNullRootNode.
@Test
public void testBtreeBuilderGetMethodIsGivingNotNullRootNode() {
BtreeBuilder builder = new BlockBTreeBuilder();
List<DataFileFooter> footerList = getDataFileFooterList();
BTreeBuilderInfo infos = new BTreeBuilderInfo(footerList, null);
builder.build(infos);
DataRefNode dataBlock = builder.get();
assertTrue(dataBlock != null);
}
use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.
the class FilterExpressionProcessor method getFilterredBlocks.
/**
* This API will scan the Segment level all btrees and selects the required
* block reference nodes inorder to push the same to executer for applying filters
* on the respective data reference node.
* Following Algorithm is followed in below API
* Step:1 Get the start end key based on the filter tree resolver information
* Step:2 Prepare the IndexKeys inorder to scan the tree and get the start and end reference
* node(block)
* Step:3 Once data reference node ranges retrieved traverse the node within this range
* and select the node based on the block min and max value and the filter value.
* Step:4 The selected blocks will be send to executers for applying the filters with the help
* of Filter executers.
*/
public List<DataRefNode> getFilterredBlocks(DataRefNode btreeNode, FilterResolverIntf filterResolver, AbstractIndex tableSegment) {
// Need to get the current dimension tables
List<DataRefNode> listOfDataBlocksToScan = new ArrayList<DataRefNode>();
// selected block reference nodes based on filter resolver tree.
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("preparing the start and end key for finding" + "start and end block as per filter resolver");
}
IndexKey searchStartKey = null;
IndexKey searchEndKey = null;
try {
searchStartKey = FilterUtil.prepareDefaultStartIndexKey(tableSegment.getSegmentProperties());
searchEndKey = FilterUtil.prepareDefaultEndIndexKey(tableSegment.getSegmentProperties());
} catch (KeyGenException e) {
throw new RuntimeException(e);
}
if (LOGGER.isDebugEnabled()) {
char delimiter = ',';
LOGGER.debug("Successfully retrieved the start and end key" + "Dictionary Start Key: " + joinByteArray(searchStartKey.getDictionaryKeys(), delimiter) + "No Dictionary Start Key " + joinByteArray(searchStartKey.getNoDictionaryKeys(), delimiter) + "Dictionary End Key: " + joinByteArray(searchEndKey.getDictionaryKeys(), delimiter) + "No Dictionary End Key " + joinByteArray(searchEndKey.getNoDictionaryKeys(), delimiter));
}
long startTimeInMillis = System.currentTimeMillis();
DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(tableSegment.getSegmentProperties().getEachDimColumnValueSize(), tableSegment.getSegmentProperties().getNumberOfSortColumns(), tableSegment.getSegmentProperties().getNumberOfNoDictSortColumns());
DataRefNode startBlock = blockFinder.findFirstDataBlock(btreeNode, searchStartKey);
DataRefNode endBlock = blockFinder.findLastDataBlock(btreeNode, searchEndKey);
FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterResolver, tableSegment.getSegmentProperties(), null);
while (startBlock != endBlock) {
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, startBlock);
startBlock = startBlock.getNextDataRefNode();
}
addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, endBlock);
LOGGER.info("Total Time in retrieving the data reference node" + "after scanning the btree " + (System.currentTimeMillis() - startTimeInMillis) + " Total number of data reference node for executing filter(s) " + listOfDataBlocksToScan.size());
return listOfDataBlocksToScan;
}
use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.
the class BlockletIterator method next.
/**
* To get the next block
* @return next data block
*/
@Override
public DataRefNode next() {
// get the current blocks
DataRefNode datablockTemp = datablock;
// store the next data block
datablock = datablock.getNextDataRefNode();
// increment the counter
blockCounter++;
// then also set the hasnext flag to false
if (null == datablock || blockCounter >= this.totalNumberOfBlocksToScan) {
hasNext = false;
}
return datablockTemp;
}
Aggregations