Search in sources :

Example 1 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class FilterExpressionProcessor method getFilterredBlocks.

/**
   * This API will scan the Segment level all btrees and selects the required
   * block reference  nodes inorder to push the same to executer for applying filters
   * on the respective data reference node.
   * Following Algorithm is followed in below API
   * Step:1 Get the start end key based on the filter tree resolver information
   * Step:2 Prepare the IndexKeys inorder to scan the tree and get the start and end reference
   * node(block)
   * Step:3 Once data reference node ranges retrieved traverse the node within this range
   * and select the node based on the block min and max value and the filter value.
   * Step:4 The selected blocks will be send to executers for applying the filters with the help
   * of Filter executers.
   *
   */
public List<DataRefNode> getFilterredBlocks(DataRefNode btreeNode, FilterResolverIntf filterResolver, AbstractIndex tableSegment, AbsoluteTableIdentifier tableIdentifier) {
    // Need to get the current dimension tables
    List<DataRefNode> listOfDataBlocksToScan = new ArrayList<DataRefNode>();
    // getting the start and end index key based on filter for hitting the
    // selected block reference nodes based on filter resolver tree.
    LOGGER.debug("preparing the start and end key for finding" + "start and end block as per filter resolver");
    List<IndexKey> listOfStartEndKeys = new ArrayList<IndexKey>(2);
    FilterUtil.traverseResolverTreeAndGetStartAndEndKey(tableSegment.getSegmentProperties(), filterResolver, listOfStartEndKeys);
    // reading the first value from list which has start key
    IndexKey searchStartKey = listOfStartEndKeys.get(0);
    // reading the last value from list which has end key
    IndexKey searchEndKey = listOfStartEndKeys.get(1);
    if (null == searchStartKey && null == searchEndKey) {
        try {
            // TODO need to handle for no dictionary dimensions
            searchStartKey = FilterUtil.prepareDefaultStartIndexKey(tableSegment.getSegmentProperties());
            // TODO need to handle for no dictionary dimensions
            searchEndKey = FilterUtil.prepareDefaultEndIndexKey(tableSegment.getSegmentProperties());
        } catch (KeyGenException e) {
            return listOfDataBlocksToScan;
        }
    }
    LOGGER.debug("Successfully retrieved the start and end key" + "Dictionary Start Key: " + searchStartKey.getDictionaryKeys() + "No Dictionary Start Key " + searchStartKey.getNoDictionaryKeys() + "Dictionary End Key: " + searchEndKey.getDictionaryKeys() + "No Dictionary End Key " + searchEndKey.getNoDictionaryKeys());
    long startTimeInMillis = System.currentTimeMillis();
    DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(tableSegment.getSegmentProperties().getEachDimColumnValueSize(), tableSegment.getSegmentProperties().getNumberOfSortColumns(), tableSegment.getSegmentProperties().getNumberOfNoDictSortColumns());
    DataRefNode startBlock = blockFinder.findFirstDataBlock(btreeNode, searchStartKey);
    DataRefNode endBlock = blockFinder.findLastDataBlock(btreeNode, searchEndKey);
    FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterResolver, tableSegment.getSegmentProperties(), null);
    while (startBlock != endBlock) {
        addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, startBlock);
        startBlock = startBlock.getNextDataRefNode();
    }
    addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, endBlock);
    LOGGER.info("Total Time in retrieving the data reference node" + "after scanning the btree " + (System.currentTimeMillis() - startTimeInMillis) + " Total number of data reference node for executing filter(s) " + listOfDataBlocksToScan.size());
    return listOfDataBlocksToScan;
}
Also used : IndexKey(org.apache.carbondata.core.datastore.IndexKey) ArrayList(java.util.ArrayList) KeyGenException(org.apache.carbondata.core.keygenerator.KeyGenException) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder) DataRefNodeFinder(org.apache.carbondata.core.datastore.DataRefNodeFinder) FilterExecuter(org.apache.carbondata.core.scan.filter.executer.FilterExecuter) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder)

Example 2 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class CarbonInputFormat method getDataBlocksOfSegment.

/**
   * get data blocks of given segment
   */
private List<DataRefNode> getDataBlocksOfSegment(JobContext job, FilterExpressionProcessor filterExpressionProcessor, AbsoluteTableIdentifier absoluteTableIdentifier, FilterResolverIntf resolver, BitSet matchedPartitions, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    try {
        QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
        QueryStatistic statistic = new QueryStatistic();
        segmentIndexMap = getSegmentAbstractIndexs(job, absoluteTableIdentifier, segmentId, cacheClient, updateStatusManager);
        List<DataRefNode> resultFilterredBlocks = new LinkedList<DataRefNode>();
        if (null != segmentIndexMap) {
            for (Map.Entry<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> entry : segmentIndexMap.entrySet()) {
                SegmentTaskIndexStore.TaskBucketHolder taskHolder = entry.getKey();
                int taskId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(taskHolder.taskNo);
                // if this partition is not required, here will skip it.
                if (matchedPartitions == null || matchedPartitions.get(taskId)) {
                    AbstractIndex abstractIndex = entry.getValue();
                    List<DataRefNode> filterredBlocks;
                    // if no filter is given get all blocks from Btree Index
                    if (null == resolver) {
                        filterredBlocks = getDataBlocksOfIndex(abstractIndex);
                    } else {
                        // apply filter and get matching blocks
                        filterredBlocks = filterExpressionProcessor.getFilterredBlocks(abstractIndex.getDataRefNode(), resolver, abstractIndex, absoluteTableIdentifier);
                    }
                    resultFilterredBlocks.addAll(filterredBlocks);
                }
            }
        }
        statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
        recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
        return resultFilterredBlocks;
    } finally {
        // low memory systems the same memory can be utilized efficiently
        if (null != segmentIndexMap) {
            List<TableSegmentUniqueIdentifier> tableSegmentUniqueIdentifiers = new ArrayList<>(1);
            tableSegmentUniqueIdentifiers.add(new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId));
            cacheClient.getSegmentAccessClient().clearAccessCount(tableSegmentUniqueIdentifiers);
        }
    }
}
Also used : DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic)

Example 3 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class BTreeBlockFinderTest method testBtreeBuilderGetMethodIsGivingNotNullRootNode.

@Test
public void testBtreeBuilderGetMethodIsGivingNotNullRootNode() {
    BtreeBuilder builder = new BlockBTreeBuilder();
    List<DataFileFooter> footerList = getDataFileFooterList();
    BTreeBuilderInfo infos = new BTreeBuilderInfo(footerList, null);
    builder.build(infos);
    DataRefNode dataBlock = builder.get();
    assertTrue(dataBlock != null);
}
Also used : DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BTreeBuilderInfo(org.apache.carbondata.core.datastore.BTreeBuilderInfo) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) BtreeBuilder(org.apache.carbondata.core.datastore.BtreeBuilder) Test(org.junit.Test)

Example 4 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class FilterExpressionProcessor method getFilterredBlocks.

/**
 * This API will scan the Segment level all btrees and selects the required
 * block reference  nodes inorder to push the same to executer for applying filters
 * on the respective data reference node.
 * Following Algorithm is followed in below API
 * Step:1 Get the start end key based on the filter tree resolver information
 * Step:2 Prepare the IndexKeys inorder to scan the tree and get the start and end reference
 * node(block)
 * Step:3 Once data reference node ranges retrieved traverse the node within this range
 * and select the node based on the block min and max value and the filter value.
 * Step:4 The selected blocks will be send to executers for applying the filters with the help
 * of Filter executers.
 */
public List<DataRefNode> getFilterredBlocks(DataRefNode btreeNode, FilterResolverIntf filterResolver, AbstractIndex tableSegment) {
    // Need to get the current dimension tables
    List<DataRefNode> listOfDataBlocksToScan = new ArrayList<DataRefNode>();
    // selected block reference nodes based on filter resolver tree.
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("preparing the start and end key for finding" + "start and end block as per filter resolver");
    }
    IndexKey searchStartKey = null;
    IndexKey searchEndKey = null;
    try {
        searchStartKey = FilterUtil.prepareDefaultStartIndexKey(tableSegment.getSegmentProperties());
        searchEndKey = FilterUtil.prepareDefaultEndIndexKey(tableSegment.getSegmentProperties());
    } catch (KeyGenException e) {
        throw new RuntimeException(e);
    }
    if (LOGGER.isDebugEnabled()) {
        char delimiter = ',';
        LOGGER.debug("Successfully retrieved the start and end key" + "Dictionary Start Key: " + joinByteArray(searchStartKey.getDictionaryKeys(), delimiter) + "No Dictionary Start Key " + joinByteArray(searchStartKey.getNoDictionaryKeys(), delimiter) + "Dictionary End Key: " + joinByteArray(searchEndKey.getDictionaryKeys(), delimiter) + "No Dictionary End Key " + joinByteArray(searchEndKey.getNoDictionaryKeys(), delimiter));
    }
    long startTimeInMillis = System.currentTimeMillis();
    DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(tableSegment.getSegmentProperties().getEachDimColumnValueSize(), tableSegment.getSegmentProperties().getNumberOfSortColumns(), tableSegment.getSegmentProperties().getNumberOfNoDictSortColumns());
    DataRefNode startBlock = blockFinder.findFirstDataBlock(btreeNode, searchStartKey);
    DataRefNode endBlock = blockFinder.findLastDataBlock(btreeNode, searchEndKey);
    FilterExecuter filterExecuter = FilterUtil.getFilterExecuterTree(filterResolver, tableSegment.getSegmentProperties(), null);
    while (startBlock != endBlock) {
        addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, startBlock);
        startBlock = startBlock.getNextDataRefNode();
    }
    addBlockBasedOnMinMaxValue(filterExecuter, listOfDataBlocksToScan, endBlock);
    LOGGER.info("Total Time in retrieving the data reference node" + "after scanning the btree " + (System.currentTimeMillis() - startTimeInMillis) + " Total number of data reference node for executing filter(s) " + listOfDataBlocksToScan.size());
    return listOfDataBlocksToScan;
}
Also used : IndexKey(org.apache.carbondata.core.datastore.IndexKey) ArrayList(java.util.ArrayList) KeyGenException(org.apache.carbondata.core.keygenerator.KeyGenException) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) DataRefNodeFinder(org.apache.carbondata.core.datastore.DataRefNodeFinder) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder) FilterExecuter(org.apache.carbondata.core.scan.filter.executer.FilterExecuter) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder)

Example 5 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class BlockletIterator method next.

/**
 * To get the next block
 * @return next data block
 */
@Override
public DataRefNode next() {
    // get the current blocks
    DataRefNode datablockTemp = datablock;
    // store the next data block
    datablock = datablock.getNextDataRefNode();
    // increment the counter
    blockCounter++;
    // then also set the hasnext flag to false
    if (null == datablock || blockCounter >= this.totalNumberOfBlocksToScan) {
        hasNext = false;
    }
    return datablockTemp;
}
Also used : DataRefNode(org.apache.carbondata.core.datastore.DataRefNode)

Aggregations

DataRefNode (org.apache.carbondata.core.datastore.DataRefNode)17 DataRefNodeFinder (org.apache.carbondata.core.datastore.DataRefNodeFinder)11 IndexKey (org.apache.carbondata.core.datastore.IndexKey)10 BTreeBuilderInfo (org.apache.carbondata.core.datastore.BTreeBuilderInfo)6 BtreeBuilder (org.apache.carbondata.core.datastore.BtreeBuilder)6 BTreeDataRefNodeFinder (org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder)6 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)6 Test (org.junit.Test)6 KeyGenException (org.apache.carbondata.core.keygenerator.KeyGenException)4 LinkedList (java.util.LinkedList)3 KeyGenerator (org.apache.carbondata.core.keygenerator.KeyGenerator)3 MultiDimKeyVarLengthGenerator (org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator)3 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)2 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 BlockBTreeLeafNode (org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)2 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)2 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)2