Search in sources :

Example 6 with PartitionSpec

use of org.apache.carbondata.core.indexstore.PartitionSpec in project carbondata by apache.

the class CarbonInputFormat method getDataBlocksOfSegment.

/**
 * get data blocks of given segment
 */
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable, FilterResolverIntf resolver, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo, List<Integer> oldPartitionIdList) throws IOException {
    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();
    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
    boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT));
    DataMapExprWrapper dataMapExprWrapper = DataMapChooser.get().choose(getOrCreateCarbonTable(job.getConfiguration()), resolver);
    DataMapJob dataMapJob = getDataMapJob(job.getConfiguration());
    List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
    List<ExtendedBlocklet> prunedBlocklets;
    if (distributedCG || dataMapExprWrapper.getDataMapType() == DataMapLevel.FG) {
        DistributableDataMapFormat datamapDstr = new DistributableDataMapFormat(carbonTable, dataMapExprWrapper, segmentIds, partitionsToPrune, BlockletDataMapFactory.class.getName());
        prunedBlocklets = dataMapJob.execute(datamapDstr, resolver);
        // Apply expression on the blocklets.
        prunedBlocklets = dataMapExprWrapper.pruneBlocklets(prunedBlocklets);
    } else {
        prunedBlocklets = dataMapExprWrapper.prune(segmentIds, partitionsToPrune);
    }
    List<CarbonInputSplit> resultFilterredBlocks = new ArrayList<>();
    int partitionIndex = 0;
    List<Integer> partitionIdList = new ArrayList<>();
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        partitionIdList = partitionInfo.getPartitionIds();
    }
    for (ExtendedBlocklet blocklet : prunedBlocklets) {
        long partitionId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));
        // For other normal query should use newest partitionIdList
        if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
            if (oldPartitionIdList != null) {
                partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
            } else {
                partitionIndex = partitionIdList.indexOf((int) partitionId);
            }
        }
        if (partitionIndex != -1) {
            // if this partition is not required, here will skip it.
            if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
                CarbonInputSplit inputSplit = convertToCarbonInputSplit(blocklet);
                if (inputSplit != null) {
                    resultFilterredBlocks.add(inputSplit);
                }
            }
        }
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
    return resultFilterredBlocks;
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) ArrayList(java.util.ArrayList) BlockletDataMapFactory(org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory) DataMapExprWrapper(org.apache.carbondata.core.datamap.dev.expr.DataMapExprWrapper) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic)

Aggregations

PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)6 ArrayList (java.util.ArrayList)3 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)3 Path (org.apache.hadoop.fs.Path)3 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Segment (org.apache.carbondata.core.datamap.Segment)1 DataMapExprWrapper (org.apache.carbondata.core.datamap.dev.expr.DataMapExprWrapper)1 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)1 CarbonFileFilter (org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter)1 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)1 BlockletDataMapFactory (org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory)1 SegmentIndexFileStore (org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore)1 SegmentFileStore (org.apache.carbondata.core.metadata.SegmentFileStore)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 QueryStatistic (org.apache.carbondata.core.stats.QueryStatistic)1 QueryStatisticsRecorder (org.apache.carbondata.core.stats.QueryStatisticsRecorder)1 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)1 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)1 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)1