Search in sources :

Example 1 with BlockBTreeLeafNode

use of org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode in project carbondata by apache.

the class InMemoryBTreeIndex method filter.

@Override
public List<Block> filter(JobContext job, FilterResolverIntf filter) throws IOException {
    List<Block> result = new LinkedList<>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier.from(segment.getPath(), "", "");
    // for this segment fetch blocks matching filter in BTree
    List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, identifier, filter);
    for (DataRefNode dataRefNode : dataRefNodes) {
        BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
        TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
        result.add(new CarbonInputSplit(segment.getId(), tableBlockInfo.getDetailInfo().getBlockletId().toString(), new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion(), null));
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Block(org.apache.carbondata.hadoop.internal.index.Block) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) LinkedList(java.util.LinkedList) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 2 with BlockBTreeLeafNode

use of org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
   * are used to get table path to read.
   *
   * @return
   * @throws IOException
   */
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver, BitSet matchedPartitions, CacheClient cacheClient) throws IOException {
    List<InputSplit> result = new LinkedList<InputSplit>();
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier();
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    //for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : getSegmentsToAccess(job)) {
        List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, absoluteTableIdentifier, filterResolver, matchedPartitions, segmentNo, cacheClient, updateStatusManager);
        for (DataRefNode dataRefNode : dataRefNodes) {
            BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
            TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
            if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                continue;
            }
            result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
        }
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) CarbonStorePath(org.apache.carbondata.core.util.path.CarbonStorePath) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Aggregations

DataRefNode (org.apache.carbondata.core.datastore.DataRefNode)2 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)2 BlockBTreeLeafNode (org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)2 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)2 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)2 Path (org.apache.hadoop.fs.Path)2 LinkedList (java.util.LinkedList)1 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)1 CarbonStorePath (org.apache.carbondata.core.util.path.CarbonStorePath)1 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)1 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)1 Block (org.apache.carbondata.hadoop.internal.index.Block)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1