Search in sources :

Example 11 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class InMemoryBTreeIndex method filter.

@Override
public List<Block> filter(JobContext job, FilterResolverIntf filter) throws IOException {
    List<Block> result = new LinkedList<>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier identifier = null;
    //for this segment fetch blocks matching filter in BTree
    List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, identifier, filter);
    for (DataRefNode dataRefNode : dataRefNodes) {
        BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
        TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
        result.add(new CarbonInputSplit(segment.getId(), new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Block(org.apache.carbondata.hadoop.internal.index.Block) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) LinkedList(java.util.LinkedList) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 12 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class InMemoryBTreeIndex method getDataBlocksOfSegment.

/**
   * get data blocks of given segment
   */
private List<DataRefNode> getDataBlocksOfSegment(JobContext job, FilterExpressionProcessor filterExpressionProcessor, AbsoluteTableIdentifier identifier, FilterResolverIntf resolver) throws IOException {
    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = getSegmentAbstractIndexs(job, identifier);
    List<DataRefNode> resultFilterredBlocks = new LinkedList<DataRefNode>();
    // build result
    for (AbstractIndex abstractIndex : segmentIndexMap.values()) {
        List<DataRefNode> filterredBlocks = null;
        // if no filter is given get all blocks from Btree Index
        if (null == resolver) {
            filterredBlocks = getDataBlocksOfIndex(abstractIndex);
        } else {
            // apply filter and get matching blocks
            filterredBlocks = filterExpressionProcessor.getFilterredBlocks(abstractIndex.getDataRefNode(), resolver, abstractIndex, identifier);
        }
        resultFilterredBlocks.addAll(filterredBlocks);
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatistics(statistic);
    recorder.logStatistics();
    return resultFilterredBlocks;
}
Also used : AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) LinkedList(java.util.LinkedList) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic)

Example 13 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class BlockLevelTraverser method getBlockRowMapping.

/**
   *
   * @param abstractIndex
   * @param blockRowMap
   * @param segId
   * @param updateStatusManager
   * @throws KeyGenException
   */
public long getBlockRowMapping(AbstractIndex abstractIndex, Map<String, Long> blockRowMap, String segId, SegmentUpdateStatusManager updateStatusManager) throws KeyGenException {
    IndexKey searchStartKey = FilterUtil.prepareDefaultStartIndexKey(abstractIndex.getSegmentProperties());
    DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(abstractIndex.getSegmentProperties().getEachDimColumnValueSize(), abstractIndex.getSegmentProperties().getNumberOfSortColumns(), abstractIndex.getSegmentProperties().getNumberOfNoDictSortColumns());
    DataRefNode currentBlock = blockFinder.findFirstDataBlock(abstractIndex.getDataRefNode(), searchStartKey);
    long count = 0;
    while (currentBlock != null) {
        String blockName = ((BlockBTreeLeafNode) currentBlock).getTableBlockInfo().getFilePath();
        blockName = CarbonTablePath.getCarbonDataFileName(blockName);
        blockName = blockName + CarbonTablePath.getCarbonDataExtension();
        long rowCount = currentBlock.nodeSize();
        String key = CarbonUpdateUtil.getSegmentBlockNameKey(segId, blockName);
        // if block is invalid then dont add the count
        SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
        if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getStatus())) {
            blockRowMap.put(key, rowCount);
            count++;
        }
        currentBlock = currentBlock.getNextDataRefNode();
    }
    return count;
}
Also used : SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) IndexKey(org.apache.carbondata.core.datastore.IndexKey) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) DataRefNodeFinder(org.apache.carbondata.core.datastore.DataRefNodeFinder) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder)

Example 14 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
   * are used to get table path to read.
   *
   * @return
   * @throws IOException
   */
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver, BitSet matchedPartitions, CacheClient cacheClient) throws IOException {
    List<InputSplit> result = new LinkedList<InputSplit>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier();
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getSegmentsToAccess(job)) {
        List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, absoluteTableIdentifier, filterResolver, matchedPartitions, segmentNo, cacheClient, updateStatusManager);
        for (DataRefNode dataRefNode : dataRefNodes) {
            BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
            TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
            if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                continue;
            }
            result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
        }
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 15 with DataRefNode

use of org.apache.carbondata.core.datastore.DataRefNode in project carbondata by apache.

the class CarbonInputFormat method getDataBlocksOfIndex.

/**
   * get data blocks of given btree
   */
private List<DataRefNode> getDataBlocksOfIndex(AbstractIndex abstractIndex) {
    List<DataRefNode> blocks = new LinkedList<DataRefNode>();
    SegmentProperties segmentProperties = abstractIndex.getSegmentProperties();
    try {
        IndexKey startIndexKey = FilterUtil.prepareDefaultStartIndexKey(segmentProperties);
        IndexKey endIndexKey = FilterUtil.prepareDefaultEndIndexKey(segmentProperties);
        // Add all blocks of btree into result
        DataRefNodeFinder blockFinder = new BTreeDataRefNodeFinder(segmentProperties.getEachDimColumnValueSize(), segmentProperties.getNumberOfSortColumns(), segmentProperties.getNumberOfNoDictSortColumns());
        DataRefNode startBlock = blockFinder.findFirstDataBlock(abstractIndex.getDataRefNode(), startIndexKey);
        DataRefNode endBlock = blockFinder.findLastDataBlock(abstractIndex.getDataRefNode(), endIndexKey);
        while (startBlock != endBlock) {
            blocks.add(startBlock);
            startBlock = startBlock.getNextDataRefNode();
        }
        blocks.add(endBlock);
    } catch (KeyGenException e) {
        LOG.error("Could not generate start key", e);
    }
    return blocks;
}
Also used : IndexKey(org.apache.carbondata.core.datastore.IndexKey) KeyGenException(org.apache.carbondata.core.keygenerator.KeyGenException) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) DataRefNodeFinder(org.apache.carbondata.core.datastore.DataRefNodeFinder) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder) BTreeDataRefNodeFinder(org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder)

Aggregations

DataRefNode (org.apache.carbondata.core.datastore.DataRefNode)16 DataRefNodeFinder (org.apache.carbondata.core.datastore.DataRefNodeFinder)10 IndexKey (org.apache.carbondata.core.datastore.IndexKey)9 BTreeBuilderInfo (org.apache.carbondata.core.datastore.BTreeBuilderInfo)6 BtreeBuilder (org.apache.carbondata.core.datastore.BtreeBuilder)6 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)6 Test (org.junit.Test)6 BTreeDataRefNodeFinder (org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder)5 LinkedList (java.util.LinkedList)3 KeyGenException (org.apache.carbondata.core.keygenerator.KeyGenException)3 KeyGenerator (org.apache.carbondata.core.keygenerator.KeyGenerator)3 MultiDimKeyVarLengthGenerator (org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator)3 ByteBuffer (java.nio.ByteBuffer)2 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)2 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 BlockBTreeLeafNode (org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)2 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)2 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)2 QueryStatistic (org.apache.carbondata.core.stats.QueryStatistic)2