Search in sources :

Example 31 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonInputFormat method getSegmentAbstractIndexs.

/**
   * It returns index for each task file.
   * @param job
   * @param absoluteTableIdentifier
   * @param segmentId
   * @return
   * @throws IOException
   */
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
    boolean isSegmentUpdated = false;
    Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
    TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
    segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
    UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
    if (null != segmentTaskIndexWrapper) {
        segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
            taskKeys = segmentIndexMap.keySet();
            isSegmentUpdated = true;
        }
    }
    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null || isSegmentUpdated) {
        // if the segment is updated only the updated blocks TableInfo instance has to be
        // retrieved. the same will be filtered based on taskKeys , if the task is same
        // for the block then dont add it since already its btree is loaded.
        Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job, tableSegmentUniqueIdentifier, taskKeys, updateStatusManager.getInvalidTimestampRange(segmentId), updateStatusManager, segmentId, validTaskKeys);
        if (!tableBlockInfoList.isEmpty()) {
            Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
            segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
            // get Btree blocks for given segment
            tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
            tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
            segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
        if (null != taskKeys) {
            Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> finalMap = new HashMap<>(validTaskKeys.size());
            for (SegmentTaskIndexStore.TaskBucketHolder key : validTaskKeys) {
                finalMap.put(key, segmentIndexMap.get(key));
            }
            segmentIndexMap = finalMap;
        }
    }
    return segmentIndexMap;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore)

Example 32 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class CarbonRecordReader method initialize.

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
    // The input split can contain single HDFS block or multiple blocks, so firstly get all the
    // blocks and then set them in the query model.
    List<CarbonInputSplit> splitList;
    if (inputSplit instanceof CarbonInputSplit) {
        splitList = new ArrayList<>(1);
        splitList.add((CarbonInputSplit) inputSplit);
    } else if (inputSplit instanceof CarbonMultiBlockSplit) {
        // contains multiple blocks, this is an optimization for concurrent query.
        CarbonMultiBlockSplit multiBlockSplit = (CarbonMultiBlockSplit) inputSplit;
        splitList = multiBlockSplit.getAllSplits();
    } else {
        throw new RuntimeException("unsupported input split type: " + inputSplit);
    }
    List<TableBlockInfo> tableBlockInfoList = CarbonInputSplit.createBlocks(splitList);
    queryModel.setTableBlockInfos(tableBlockInfoList);
    readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
    try {
        carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
    } catch (QueryExecutionException e) {
        throw new InterruptedException(e.getMessage());
    }
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ChunkRowIterator(org.apache.carbondata.core.scan.result.iterator.ChunkRowIterator) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)

Example 33 with TableBlockInfo

use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.

the class InMemoryBTreeIndex method getTableBlockInfo.

/**
   * Below method will be used to get the table block info
   *
   * @param job                     job context
   * @return list of table block
   * @throws IOException
   */
private List<TableBlockInfo> getTableBlockInfo(JobContext job) throws IOException {
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
    // identify table blocks from all file locations of given segment
    for (InputSplit inputSplit : segment.getAllSplits(job)) {
        CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
        BlockletInfos blockletInfos = new BlockletInfos(carbonInputSplit.getNumberOfBlocklets(), 0, carbonInputSplit.getNumberOfBlocklets());
        tableBlockInfoList.add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segment.getId(), carbonInputSplit.getLocations(), carbonInputSplit.getLength(), blockletInfos, carbonInputSplit.getVersion()));
    }
    return tableBlockInfoList;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) BlockletInfos(org.apache.carbondata.core.datastore.block.BlockletInfos) ArrayList(java.util.ArrayList) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Aggregations

TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)33 ArrayList (java.util.ArrayList)19 Test (org.junit.Test)11 HashMap (java.util.HashMap)10 List (java.util.List)9 LinkedList (java.util.LinkedList)7 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)7 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)7 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)6 LinkedHashMap (java.util.LinkedHashMap)5 IOException (java.io.IOException)4 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)4 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)4 File (java.io.File)3 Map (java.util.Map)3 MockUp (mockit.MockUp)3 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)3 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)3 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)3 QueryExecutionException (org.apache.carbondata.core.scan.executor.exception.QueryExecutionException)3