use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.
the class BlockletDataMapFactory method getExtendedBlocklets.
/**
* Get the blocklet detail information based on blockletid, blockid and segmentid. This method is
* exclusively for BlockletDataMapFactory as detail information is only available in this
* default datamap.
*/
@Override
public List<ExtendedBlocklet> getExtendedBlocklets(List<Blocklet> blocklets, Segment segment) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
// If it is already detailed blocklet then type cast and return same
if (blocklets.size() > 0 && blocklets.get(0) instanceof ExtendedBlocklet) {
for (Blocklet blocklet : blocklets) {
detailedBlocklets.add((ExtendedBlocklet) blocklet);
}
return detailedBlocklets;
}
List<TableBlockIndexUniqueIdentifier> identifiers = getTableBlockIndexUniqueIdentifiers(segment);
// Retrieve each blocklets detail information from blocklet datamap
for (Blocklet blocklet : blocklets) {
detailedBlocklets.add(getExtendedBlocklet(identifiers, blocklet));
}
return detailedBlocklets;
}
use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.
the class CarbonInputFormat method getDataBlocksOfSegment.
/**
* get data blocks of given segment
*/
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable, FilterResolverIntf resolver, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo, List<Integer> oldPartitionIdList) throws IOException {
QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
QueryStatistic statistic = new QueryStatistic();
// get tokens for all the required FileSystem for table path
TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT));
DataMapExprWrapper dataMapExprWrapper = DataMapChooser.get().choose(getOrCreateCarbonTable(job.getConfiguration()), resolver);
DataMapJob dataMapJob = getDataMapJob(job.getConfiguration());
List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
List<ExtendedBlocklet> prunedBlocklets;
if (distributedCG || dataMapExprWrapper.getDataMapType() == DataMapLevel.FG) {
DistributableDataMapFormat datamapDstr = new DistributableDataMapFormat(carbonTable, dataMapExprWrapper, segmentIds, partitionsToPrune, BlockletDataMapFactory.class.getName());
prunedBlocklets = dataMapJob.execute(datamapDstr, resolver);
// Apply expression on the blocklets.
prunedBlocklets = dataMapExprWrapper.pruneBlocklets(prunedBlocklets);
} else {
prunedBlocklets = dataMapExprWrapper.prune(segmentIds, partitionsToPrune);
}
List<CarbonInputSplit> resultFilterredBlocks = new ArrayList<>();
int partitionIndex = 0;
List<Integer> partitionIdList = new ArrayList<>();
if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
partitionIdList = partitionInfo.getPartitionIds();
}
for (ExtendedBlocklet blocklet : prunedBlocklets) {
long partitionId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));
// For other normal query should use newest partitionIdList
if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
if (oldPartitionIdList != null) {
partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
} else {
partitionIndex = partitionIdList.indexOf((int) partitionId);
}
}
if (partitionIndex != -1) {
// if this partition is not required, here will skip it.
if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet);
if (inputSplit != null) {
resultFilterredBlocks.add(inputSplit);
}
}
}
}
statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
return resultFilterredBlocks;
}
use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.
the class CarbonTableInputFormat method getBlockRowCount.
/**
* Get the row count of the Block and mapping of segment and Block count.
*/
public BlockMappingVO getBlockRowCount(Job job, CarbonTable table, List<PartitionSpec> partitions) throws IOException {
AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
TableDataMap blockletMap = DataMapStoreManager.getInstance().getDefaultDataMap(table);
LoadMetadataDetails[] loadMetadataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table, loadMetadataDetails);
SegmentStatusManager.ValidAndInvalidSegmentsInfo allSegments = new SegmentStatusManager(identifier).getValidAndInvalidSegments(loadMetadataDetails);
Map<String, Long> blockRowCountMapping = new HashMap<>();
Map<String, Long> segmentAndBlockCountMapping = new HashMap<>();
// TODO: currently only batch segment is supported, add support for streaming table
List<Segment> filteredSegment = getFilteredSegment(job, allSegments.getValidSegments(), false);
List<ExtendedBlocklet> blocklets = blockletMap.prune(filteredSegment, null, partitions);
for (ExtendedBlocklet blocklet : blocklets) {
String blockName = blocklet.getPath();
blockName = CarbonTablePath.getCarbonDataFileName(blockName);
blockName = blockName + CarbonTablePath.getCarbonDataExtension();
long rowCount = blocklet.getDetailInfo().getRowCount();
String key = CarbonUpdateUtil.getSegmentBlockNameKey(blocklet.getSegmentId(), blockName);
// if block is invalid then dont add the count
SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
Long blockCount = blockRowCountMapping.get(key);
if (blockCount == null) {
blockCount = 0L;
Long count = segmentAndBlockCountMapping.get(blocklet.getSegmentId());
if (count == null) {
count = 0L;
}
segmentAndBlockCountMapping.put(blocklet.getSegmentId(), count + 1);
}
blockCount += rowCount;
blockRowCountMapping.put(key, blockCount);
}
}
return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
}
use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.
the class DistributableDataMapFormat method createRecordReader.
@Override
public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new RecordReader<Void, ExtendedBlocklet>() {
private Iterator<ExtendedBlocklet> blockletIterator;
private ExtendedBlocklet currBlocklet;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
DataMapDistributableWrapper distributable = (DataMapDistributableWrapper) inputSplit;
TableDataMap dataMap = DataMapStoreManager.getInstance().getDataMap(table, distributable.getDistributable().getDataMapSchema());
List<ExtendedBlocklet> blocklets = dataMap.prune(distributable.getDistributable(), dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions);
for (ExtendedBlocklet blocklet : blocklets) {
blocklet.setDataMapUniqueId(distributable.getUniqueId());
}
blockletIterator = blocklets.iterator();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
boolean hasNext = blockletIterator.hasNext();
if (hasNext) {
currBlocklet = blockletIterator.next();
}
return hasNext;
}
@Override
public Void getCurrentKey() throws IOException, InterruptedException {
return null;
}
@Override
public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException {
return currBlocklet;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
}
};
}
Aggregations