use of org.apache.carbondata.hadoop.util.BlockLevelTraverser in project carbondata by apache.
the class CarbonInputFormat method getBlockRowCount.
/**
* Get the row count of the Block and mapping of segment and Block count.
* @param job
* @param absoluteTableIdentifier
* @return
* @throws IOException
* @throws KeyGenException
*/
public BlockMappingVO getBlockRowCount(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException, KeyGenException {
CacheClient cacheClient = new CacheClient(absoluteTableIdentifier.getStorePath());
try {
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegments = new SegmentStatusManager(absoluteTableIdentifier).getValidAndInvalidSegments();
Map<String, Long> blockRowCountMapping = new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
Map<String, Long> segmentAndBlockCountMapping = new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
for (String eachValidSeg : validAndInvalidSegments.getValidSegments()) {
long countOfBlocksInSeg = 0;
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> taskAbstractIndexMap = getSegmentAbstractIndexs(job, absoluteTableIdentifier, eachValidSeg, cacheClient, updateStatusManager);
for (Map.Entry<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> taskMap : taskAbstractIndexMap.entrySet()) {
AbstractIndex taskAbstractIndex = taskMap.getValue();
countOfBlocksInSeg += new BlockLevelTraverser().getBlockRowMapping(taskAbstractIndex, blockRowCountMapping, eachValidSeg, updateStatusManager);
}
segmentAndBlockCountMapping.put(eachValidSeg, countOfBlocksInSeg);
}
return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
} finally {
cacheClient.close();
}
}
Aggregations