Search in sources :

Example 6 with ExtendedBlocklet

use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.

the class BlockletDataMapFactory method getExtendedBlocklets.

/**
 * Get the blocklet detail information based on blockletid, blockid and segmentid. This method is
 * exclusively for BlockletDataMapFactory as detail information is only available in this
 * default datamap.
 */
@Override
public List<ExtendedBlocklet> getExtendedBlocklets(List<Blocklet> blocklets, Segment segment) throws IOException {
    List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
    // If it is already detailed blocklet then type cast and return same
    if (blocklets.size() > 0 && blocklets.get(0) instanceof ExtendedBlocklet) {
        for (Blocklet blocklet : blocklets) {
            detailedBlocklets.add((ExtendedBlocklet) blocklet);
        }
        return detailedBlocklets;
    }
    List<TableBlockIndexUniqueIdentifier> identifiers = getTableBlockIndexUniqueIdentifiers(segment);
    // Retrieve each blocklets detail information from blocklet datamap
    for (Blocklet blocklet : blocklets) {
        detailedBlocklets.add(getExtendedBlocklet(identifiers, blocklet));
    }
    return detailedBlocklets;
}
Also used : ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Example 7 with ExtendedBlocklet

use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.

the class CarbonInputFormat method getDataBlocksOfSegment.

/**
 * get data blocks of given segment
 */
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable, FilterResolverIntf resolver, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo, List<Integer> oldPartitionIdList) throws IOException {
    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();
    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
    boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT));
    DataMapExprWrapper dataMapExprWrapper = DataMapChooser.get().choose(getOrCreateCarbonTable(job.getConfiguration()), resolver);
    DataMapJob dataMapJob = getDataMapJob(job.getConfiguration());
    List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
    List<ExtendedBlocklet> prunedBlocklets;
    if (distributedCG || dataMapExprWrapper.getDataMapType() == DataMapLevel.FG) {
        DistributableDataMapFormat datamapDstr = new DistributableDataMapFormat(carbonTable, dataMapExprWrapper, segmentIds, partitionsToPrune, BlockletDataMapFactory.class.getName());
        prunedBlocklets = dataMapJob.execute(datamapDstr, resolver);
        // Apply expression on the blocklets.
        prunedBlocklets = dataMapExprWrapper.pruneBlocklets(prunedBlocklets);
    } else {
        prunedBlocklets = dataMapExprWrapper.prune(segmentIds, partitionsToPrune);
    }
    List<CarbonInputSplit> resultFilterredBlocks = new ArrayList<>();
    int partitionIndex = 0;
    List<Integer> partitionIdList = new ArrayList<>();
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        partitionIdList = partitionInfo.getPartitionIds();
    }
    for (ExtendedBlocklet blocklet : prunedBlocklets) {
        long partitionId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));
        // For other normal query should use newest partitionIdList
        if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
            if (oldPartitionIdList != null) {
                partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
            } else {
                partitionIndex = partitionIdList.indexOf((int) partitionId);
            }
        }
        if (partitionIndex != -1) {
            // if this partition is not required, here will skip it.
            if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
                CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet);
                if (inputSplit != null) {
                    resultFilterredBlocks.add(inputSplit);
                }
            }
        }
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
    return resultFilterredBlocks;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) ArrayList(java.util.ArrayList) BlockletDataMapFactory(org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory) DataMapExprWrapper(org.apache.carbondata.core.datamap.dev.expr.DataMapExprWrapper) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic)

Example 8 with ExtendedBlocklet

use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.

the class CarbonTableInputFormat method getBlockRowCount.

/**
 * Get the row count of the Block and mapping of segment and Block count.
 */
public BlockMappingVO getBlockRowCount(Job job, CarbonTable table, List<PartitionSpec> partitions) throws IOException {
    AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
    TableDataMap blockletMap = DataMapStoreManager.getInstance().getDefaultDataMap(table);
    LoadMetadataDetails[] loadMetadataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table, loadMetadataDetails);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo allSegments = new SegmentStatusManager(identifier).getValidAndInvalidSegments(loadMetadataDetails);
    Map<String, Long> blockRowCountMapping = new HashMap<>();
    Map<String, Long> segmentAndBlockCountMapping = new HashMap<>();
    // TODO: currently only batch segment is supported, add support for streaming table
    List<Segment> filteredSegment = getFilteredSegment(job, allSegments.getValidSegments(), false);
    List<ExtendedBlocklet> blocklets = blockletMap.prune(filteredSegment, null, partitions);
    for (ExtendedBlocklet blocklet : blocklets) {
        String blockName = blocklet.getPath();
        blockName = CarbonTablePath.getCarbonDataFileName(blockName);
        blockName = blockName + CarbonTablePath.getCarbonDataExtension();
        long rowCount = blocklet.getDetailInfo().getRowCount();
        String key = CarbonUpdateUtil.getSegmentBlockNameKey(blocklet.getSegmentId(), blockName);
        // if block is invalid then dont add the count
        SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
        if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
            Long blockCount = blockRowCountMapping.get(key);
            if (blockCount == null) {
                blockCount = 0L;
                Long count = segmentAndBlockCountMapping.get(blocklet.getSegmentId());
                if (count == null) {
                    count = 0L;
                }
                segmentAndBlockCountMapping.put(blocklet.getSegmentId(), count + 1);
            }
            blockCount += rowCount;
            blockRowCountMapping.put(key, blockCount);
        }
    }
    return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
}
Also used : BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) HashMap(java.util.HashMap) TableDataMap(org.apache.carbondata.core.datamap.TableDataMap) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) Segment(org.apache.carbondata.core.datamap.Segment) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Example 9 with ExtendedBlocklet

use of org.apache.carbondata.core.indexstore.ExtendedBlocklet in project carbondata by apache.

the class DistributableDataMapFormat method createRecordReader.

@Override
public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    return new RecordReader<Void, ExtendedBlocklet>() {

        private Iterator<ExtendedBlocklet> blockletIterator;

        private ExtendedBlocklet currBlocklet;

        @Override
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            DataMapDistributableWrapper distributable = (DataMapDistributableWrapper) inputSplit;
            TableDataMap dataMap = DataMapStoreManager.getInstance().getDataMap(table, distributable.getDistributable().getDataMapSchema());
            List<ExtendedBlocklet> blocklets = dataMap.prune(distributable.getDistributable(), dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions);
            for (ExtendedBlocklet blocklet : blocklets) {
                blocklet.setDataMapUniqueId(distributable.getUniqueId());
            }
            blockletIterator = blocklets.iterator();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean hasNext = blockletIterator.hasNext();
            if (hasNext) {
                currBlocklet = blockletIterator.next();
            }
            return hasNext;
        }

        @Override
        public Void getCurrentKey() throws IOException, InterruptedException {
            return null;
        }

        @Override
        public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException {
            return currBlocklet;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }
    };
}
Also used : DataMapDistributableWrapper(org.apache.carbondata.core.datamap.dev.expr.DataMapDistributableWrapper) TableDataMap(org.apache.carbondata.core.datamap.TableDataMap) RecordReader(org.apache.hadoop.mapreduce.RecordReader) Iterator(java.util.Iterator) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) InputSplit(org.apache.hadoop.mapreduce.InputSplit) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Aggregations

ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)9 ArrayList (java.util.ArrayList)6 Blocklet (org.apache.carbondata.core.indexstore.Blocklet)3 TableDataMap (org.apache.carbondata.core.datamap.TableDataMap)2 DataMap (org.apache.carbondata.core.datamap.dev.DataMap)2 FineGrainBlocklet (org.apache.carbondata.core.datamap.dev.fgdatamap.FineGrainBlocklet)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 Segment (org.apache.carbondata.core.datamap.Segment)1 BlockletSerializer (org.apache.carbondata.core.datamap.dev.BlockletSerializer)1 DataMapDistributableWrapper (org.apache.carbondata.core.datamap.dev.expr.DataMapDistributableWrapper)1 DataMapExprWrapper (org.apache.carbondata.core.datamap.dev.expr.DataMapExprWrapper)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 BlockletDetailInfo (org.apache.carbondata.core.indexstore.BlockletDetailInfo)1 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)1 TableBlockIndexUniqueIdentifier (org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier)1 BlockletDataMapFactory (org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory)1