use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonInputFormat method getSegmentAbstractIndexs.
/**
* It returns index for each task file.
* @param job
* @param absoluteTableIdentifier
* @param segmentId
* @return
* @throws IOException
*/
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
boolean isSegmentUpdated = false;
Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
if (null != segmentTaskIndexWrapper) {
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
taskKeys = segmentIndexMap.keySet();
isSegmentUpdated = true;
}
}
// if segment tree is not loaded, load the segment tree
if (segmentIndexMap == null || isSegmentUpdated) {
// if the segment is updated only the updated blocks TableInfo instance has to be
// retrieved. the same will be filtered based on taskKeys , if the task is same
// for the block then dont add it since already its btree is loaded.
Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job, tableSegmentUniqueIdentifier, taskKeys, updateStatusManager.getInvalidTimestampRange(segmentId), updateStatusManager, segmentId, validTaskKeys);
if (!tableBlockInfoList.isEmpty()) {
Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
// get Btree blocks for given segment
tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
}
if (null != taskKeys) {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> finalMap = new HashMap<>(validTaskKeys.size());
for (SegmentTaskIndexStore.TaskBucketHolder key : validTaskKeys) {
finalMap.put(key, segmentIndexMap.get(key));
}
segmentIndexMap = finalMap;
}
}
return segmentIndexMap;
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonRecordReader method initialize.
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
// The input split can contain single HDFS block or multiple blocks, so firstly get all the
// blocks and then set them in the query model.
List<CarbonInputSplit> splitList;
if (inputSplit instanceof CarbonInputSplit) {
splitList = new ArrayList<>(1);
splitList.add((CarbonInputSplit) inputSplit);
} else if (inputSplit instanceof CarbonMultiBlockSplit) {
// contains multiple blocks, this is an optimization for concurrent query.
CarbonMultiBlockSplit multiBlockSplit = (CarbonMultiBlockSplit) inputSplit;
splitList = multiBlockSplit.getAllSplits();
} else {
throw new RuntimeException("unsupported input split type: " + inputSplit);
}
List<TableBlockInfo> tableBlockInfoList = CarbonInputSplit.createBlocks(splitList);
queryModel.setTableBlockInfos(tableBlockInfoList);
readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
try {
carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
} catch (QueryExecutionException e) {
throw new InterruptedException(e.getMessage());
}
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class InMemoryBTreeIndex method getTableBlockInfo.
/**
* Below method will be used to get the table block info
*
* @param job job context
* @return list of table block
* @throws IOException
*/
private List<TableBlockInfo> getTableBlockInfo(JobContext job) throws IOException {
List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
// identify table blocks from all file locations of given segment
for (InputSplit inputSplit : segment.getAllSplits(job)) {
CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
BlockletInfos blockletInfos = new BlockletInfos(carbonInputSplit.getNumberOfBlocklets(), 0, carbonInputSplit.getNumberOfBlocklets());
tableBlockInfoList.add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segment.getId(), carbonInputSplit.getLocations(), carbonInputSplit.getLength(), blockletInfos, carbonInputSplit.getVersion()));
}
return tableBlockInfoList;
}
Aggregations