Search in sources :

Example 1 with IndexExprWrapper

use of org.apache.carbondata.core.index.dev.expr.IndexExprWrapper in project carbondata by apache.

the class CarbonInputFormat method getPrunedBlocklets.

/**
 * Prune the blocklets using the filter expression with available index.
 * First pruned with default blocklet index, then pruned with CG and FG index
 */
public List<ExtendedBlocklet> getPrunedBlocklets(JobContext job, CarbonTable carbonTable, IndexFilter filter, List<Segment> validSegments, List<Segment> invalidSegments, List<String> segmentsToBeRefreshed) throws IOException {
    ExplainCollector.addPruningInfo(carbonTable.getTableName());
    filter = filter == null ? new IndexFilter(carbonTable, null) : filter;
    ExplainCollector.setFilterStatement(filter.getExpression() == null ? "none" : filter.getExpression().getStatement());
    boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_INDEX, CarbonCommonConstants.USE_DISTRIBUTED_INDEX_DEFAULT));
    IndexJob indexJob = IndexUtil.getIndexJob(job.getConfiguration());
    List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
    // First prune using default index on driver side.
    TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(carbonTable);
    List<ExtendedBlocklet> prunedBlocklets;
    // This is to log the event, so user will know what is happening by seeing logs.
    LOG.info("Started block pruning ...");
    boolean isDistributedPruningEnabled = CarbonProperties.getInstance().isDistributedPruningEnabled(carbonTable.getDatabaseName(), carbonTable.getTableName());
    boolean isIndexServerContext = job.getConfiguration().get("isIndexServerContext", "false").equals("true");
    if (isDistributedPruningEnabled && !isIndexServerContext) {
        try {
            prunedBlocklets = getDistributedSplit(carbonTable, filter.getResolver(), partitionsToPrune, validSegments, invalidSegments, segmentsToBeRefreshed, false, job.getConfiguration(), filter.getMissingSISegments());
        } catch (Exception e) {
            // pruning.
            if (CarbonProperties.getInstance().isFallBackDisabled()) {
                throw e;
            }
            prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
        }
    } else {
        if (carbonTable.isTransactionalTable()) {
            IndexExprWrapper indexExprWrapper = IndexChooser.getDefaultIndex(getOrCreateCarbonTable(job.getConfiguration()), null);
            IndexUtil.loadIndexes(carbonTable, indexExprWrapper, validSegments);
        }
        prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
        if (ExplainCollector.enabled()) {
            ExplainCollector.setDefaultIndexPruningBlockHit(getBlockCount(prunedBlocklets));
        }
        if (prunedBlocklets.size() == 0) {
            return prunedBlocklets;
        }
        IndexChooser chooser = new IndexChooser(getOrCreateCarbonTable(job.getConfiguration()), isSecondaryIndexPruningEnabled(job.getConfiguration()));
        // Get the available CG indexes and prune further.
        IndexExprWrapper cgIndexExprWrapper = chooser.chooseCGIndex(filter.getResolver());
        if (cgIndexExprWrapper != null) {
            // Prune segments from already pruned blocklets
            IndexUtil.pruneSegments(validSegments, prunedBlocklets);
            List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
            // If SI present in cgIndexExprWrapper then set the list of
            // blocklet in segment which are pruned by default index,
            // and this list will be return from SI prune method if segment is not present in SI.
            Map<String, List<ExtendedBlocklet>> segmentsToBlocklet = new HashMap<>();
            for (ExtendedBlocklet extendedBlocklet : prunedBlocklets) {
                List<ExtendedBlocklet> extendedBlockletList = segmentsToBlocklet.getOrDefault(extendedBlocklet.getSegmentId(), new ArrayList<>());
                extendedBlockletList.add(extendedBlocklet);
                segmentsToBlocklet.put(extendedBlocklet.getSegmentId(), extendedBlockletList);
            }
            for (Segment seg : validSegments) {
                seg.setDefaultIndexPrunedBlocklets(segmentsToBlocklet.get(seg.getSegmentNo()));
            }
            boolean isCGPruneFallback = false;
            // Again prune with CG index.
            try {
                if (distributedCG && indexJob != null) {
                    cgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, IndexLevel.CG, new ArrayList<>(), job.getConfiguration());
                } else {
                    cgPrunedBlocklets = cgIndexExprWrapper.prune(validSegments, partitionsToPrune);
                }
            } catch (Exception e) {
                isCGPruneFallback = true;
                LOG.error("CG index pruning failed.", e);
            }
            // hence no need to do intersect and simply pass the prunedBlocklets from default index
            if (!isCGPruneFallback) {
                if (isIndexServerContext) {
                    // For all blocklets initialize the detail info so that it can be serialized to driver
                    for (ExtendedBlocklet blocklet : cgPrunedBlocklets) {
                        blocklet.getDetailInfo();
                        blocklet.setCgIndexPresent(true);
                    }
                }
                // since index index prune in segment scope,
                // the result need to intersect with previous pruned result
                prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
            }
            if (ExplainCollector.enabled()) {
                ExplainCollector.recordCGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(cgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
            }
        }
        if (prunedBlocklets.size() == 0) {
            return prunedBlocklets;
        }
        // Now try to prune with FG Index.
        if (isFgIndexPruningEnable(job.getConfiguration()) && indexJob != null) {
            IndexExprWrapper fgIndexExprWrapper = chooser.chooseFGIndex(filter.getResolver());
            List<ExtendedBlocklet> fgPrunedBlocklets;
            if (fgIndexExprWrapper != null) {
                // Prune segments from already pruned blocklets
                IndexUtil.pruneSegments(validSegments, prunedBlocklets);
                // Prune segments from already pruned blocklets
                fgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, fgIndexExprWrapper.getIndexLevel(), new ArrayList<>(), job.getConfiguration());
                // note that the 'fgPrunedBlocklets' has extra index related info compared with
                // 'prunedBlocklets', so the intersection should keep the elements in 'fgPrunedBlocklets'
                prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, fgPrunedBlocklets);
                ExplainCollector.recordFGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(fgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
            }
        }
    }
    LOG.info("Finished block pruning ...");
    return prunedBlocklets;
}
Also used : IndexJob(org.apache.carbondata.core.index.IndexJob) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TableIndex(org.apache.carbondata.core.index.TableIndex) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) InvalidConfigurationException(org.apache.carbondata.core.exception.InvalidConfigurationException) IOException(java.io.IOException) Segment(org.apache.carbondata.core.index.Segment) IndexChooser(org.apache.carbondata.core.index.IndexChooser) List(java.util.List) ArrayList(java.util.ArrayList) IndexFilter(org.apache.carbondata.core.index.IndexFilter) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) IndexExprWrapper(org.apache.carbondata.core.index.dev.expr.IndexExprWrapper)

Example 2 with IndexExprWrapper

use of org.apache.carbondata.core.index.dev.expr.IndexExprWrapper in project carbondata by apache.

the class IndexUtil method pruneIndexes.

static List<ExtendedBlocklet> pruneIndexes(CarbonTable table, FilterResolverIntf filterResolverIntf, List<Segment> segmentsToLoad, List<PartitionSpec> partitions, List<ExtendedBlocklet> blocklets, IndexLevel indexLevel, IndexChooser indexChooser) throws IOException {
    IndexExprWrapper indexExprWrapper = indexChooser.chooseIndex(indexLevel, filterResolverIntf);
    if (indexExprWrapper != null) {
        List<ExtendedBlocklet> extendedBlocklets = new ArrayList<>();
        // Prune segments from already pruned blocklets
        for (IndexInputSplitWrapper wrapper : indexExprWrapper.toDistributable(segmentsToLoad)) {
            TableIndex index = IndexStoreManager.getInstance().getIndex(table, wrapper.getDistributable().getIndexSchema());
            List<Index> indices = index.getTableIndexes(wrapper.getDistributable());
            List<ExtendedBlocklet> prunedBlocklet = new ArrayList<>();
            if (table.isTransactionalTable()) {
                prunedBlocklet.addAll(index.prune(indices, wrapper.getDistributable(), indexExprWrapper.getFilterResolverIntf(wrapper.getUniqueId()), partitions));
            } else {
                prunedBlocklet.addAll(index.prune(segmentsToLoad, new IndexFilter(filterResolverIntf), partitions));
            }
            // For all blocklets initialize the detail info so that it can be serialized to the driver.
            for (ExtendedBlocklet blocklet : prunedBlocklet) {
                blocklet.getDetailInfo();
                blocklet.setIndexUniqueId(wrapper.getUniqueId());
                blocklet.setCgIndexPresent(true);
            }
            extendedBlocklets.addAll(prunedBlocklet);
        }
        return indexExprWrapper.pruneBlocklets(extendedBlocklets);
    }
    return blocklets;
}
Also used : IndexInputSplitWrapper(org.apache.carbondata.core.index.dev.expr.IndexInputSplitWrapper) ArrayList(java.util.ArrayList) Index(org.apache.carbondata.core.index.dev.Index) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) IndexExprWrapper(org.apache.carbondata.core.index.dev.expr.IndexExprWrapper)

Example 3 with IndexExprWrapper

use of org.apache.carbondata.core.index.dev.expr.IndexExprWrapper in project carbondata by apache.

the class CarbonTableInputFormat method getBlockRowCount.

/**
 * Get the row count of the Block and mapping of segment and Block count.
 */
public BlockMappingVO getBlockRowCount(Job job, CarbonTable table, List<PartitionSpec> partitions, boolean isUpdateFlow) throws IOException {
    // Normal query flow goes to CarbonInputFormat#getPrunedBlocklets and initialize the
    // pruning info for table we queried. But here count star query without filter uses a different
    // query plan, and no pruning info is initialized. When it calls default index to
    // prune(with a null filter), exception will occur during setting pruning info.
    // Considering no useful information about block/blocklet pruning for such query
    // (actually no pruning), so we disable explain collector here
    ExplainCollector.remove();
    AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
    ReadCommittedScope readCommittedScope = getReadCommitted(job, identifier);
    LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table, loadMetadataDetails);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo allSegments = new SegmentStatusManager(identifier, readCommittedScope.getConfiguration()).getValidAndInvalidSegments(table.isMV(), loadMetadataDetails, readCommittedScope);
    Map<String, Long> blockRowCountMapping = new HashMap<>();
    Map<String, Long> segmentAndBlockCountMapping = new HashMap<>();
    Map<String, String> blockToSegmentMapping = new HashMap<>();
    // TODO: currently only batch segment is supported, add support for streaming table
    List<Segment> filteredSegment = getFilteredSegment(job, allSegments.getValidSegments(), false, readCommittedScope);
    boolean isIUDTable = (updateStatusManager.getUpdateStatusDetails().length != 0);
    /* In the select * flow, getSplits() method was clearing the segmentMap if,
    segment needs refreshing. same thing need for select count(*) flow also.
    For NonTransactional table, one of the reason for a segment refresh is below scenario.
    SDK is written one set of files with UUID, with same UUID it can write again.
    So, latest files content should reflect the new count by refreshing the segment */
    List<String> toBeCleanedSegments = new ArrayList<>();
    for (Segment segment : filteredSegment) {
        boolean refreshNeeded = IndexStoreManager.getInstance().getTableSegmentRefresher(getOrCreateCarbonTable(job.getConfiguration())).isRefreshNeeded(segment, SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()));
        if (refreshNeeded) {
            toBeCleanedSegments.add(segment.getSegmentNo());
        }
    }
    for (Segment segment : allSegments.getInvalidSegments()) {
        // remove entry in the segment index if there are invalid segments
        toBeCleanedSegments.add(segment.getSegmentNo());
    }
    if (toBeCleanedSegments.size() > 0) {
        IndexStoreManager.getInstance().clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()), toBeCleanedSegments);
    }
    IndexExprWrapper indexExprWrapper = IndexChooser.getDefaultIndex(getOrCreateCarbonTable(job.getConfiguration()), null);
    IndexUtil.loadIndexes(table, indexExprWrapper, filteredSegment);
    if (isIUDTable || isUpdateFlow) {
        Map<String, Long> blockletToRowCountMap = new HashMap<>();
        if (CarbonProperties.getInstance().isDistributedPruningEnabled(table.getDatabaseName(), table.getTableName())) {
            try {
                List<ExtendedBlocklet> extendedBlocklets = getDistributedBlockRowCount(table, partitions, filteredSegment, allSegments.getInvalidSegments(), toBeCleanedSegments, job.getConfiguration());
                for (ExtendedBlocklet blocklet : extendedBlocklets) {
                    String filePath = blocklet.getFilePath().replace("\\", "/");
                    String blockName = filePath.substring(filePath.lastIndexOf("/") + 1);
                    blockletToRowCountMap.put(blocklet.getSegmentId() + "," + blockName, blocklet.getRowCount());
                }
            } catch (Exception e) {
                // pruning.
                if (CarbonProperties.getInstance().isFallBackDisabled()) {
                    throw e;
                }
                TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(table);
                blockletToRowCountMap.putAll(defaultIndex.getBlockRowCount(filteredSegment, partitions, defaultIndex));
            }
        } else {
            TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(table);
            blockletToRowCountMap.putAll(defaultIndex.getBlockRowCount(filteredSegment, partitions, defaultIndex));
        }
        // key is the (segmentId","+blockletPath) and key is the row count of that blocklet
        for (Map.Entry<String, Long> eachBlocklet : blockletToRowCountMap.entrySet()) {
            String[] segmentIdAndPath = eachBlocklet.getKey().split(",", 2);
            String segmentId = segmentIdAndPath[0];
            String blockName = segmentIdAndPath[1];
            long rowCount = eachBlocklet.getValue();
            String key = CarbonUpdateUtil.getSegmentBlockNameKey(segmentId, blockName, table.isHivePartitionTable());
            // if block is invalid then don't add the count
            SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
            if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
                Long blockCount = blockRowCountMapping.get(key);
                if (blockCount == null) {
                    blockCount = 0L;
                    Long count = segmentAndBlockCountMapping.get(segmentId);
                    if (count == null) {
                        count = 0L;
                    }
                    segmentAndBlockCountMapping.put(segmentId, count + 1);
                }
                blockToSegmentMapping.put(key, segmentId);
                blockCount += rowCount;
                blockRowCountMapping.put(key, blockCount);
            }
        }
    } else {
        long totalRowCount;
        if (CarbonProperties.getInstance().isDistributedPruningEnabled(table.getDatabaseName(), table.getTableName())) {
            totalRowCount = getDistributedCount(table, partitions, filteredSegment, job.getConfiguration());
        } else {
            TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(table);
            totalRowCount = defaultIndex.getRowCount(filteredSegment, partitions, defaultIndex);
        }
        blockRowCountMapping.put(CarbonCommonConstantsInternal.ROW_COUNT, totalRowCount);
    }
    BlockMappingVO blockMappingVO = new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
    blockMappingVO.setBlockToSegmentMapping(blockToSegmentMapping);
    return blockMappingVO;
}
Also used : BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) HashMap(java.util.HashMap) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) Segment(org.apache.carbondata.core.index.Segment) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) IndexExprWrapper(org.apache.carbondata.core.index.dev.expr.IndexExprWrapper) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableIndex(org.apache.carbondata.core.index.TableIndex) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) DeprecatedFeatureException(org.apache.carbondata.common.exceptions.DeprecatedFeatureException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) TableStatusReadCommittedScope(org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

ArrayList (java.util.ArrayList)3 IndexExprWrapper (org.apache.carbondata.core.index.dev.expr.IndexExprWrapper)3 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 Segment (org.apache.carbondata.core.index.Segment)2 TableIndex (org.apache.carbondata.core.index.TableIndex)2 List (java.util.List)1 Map (java.util.Map)1 ExecutionException (java.util.concurrent.ExecutionException)1 DeprecatedFeatureException (org.apache.carbondata.common.exceptions.DeprecatedFeatureException)1 InvalidConfigurationException (org.apache.carbondata.core.exception.InvalidConfigurationException)1 IndexChooser (org.apache.carbondata.core.index.IndexChooser)1 IndexFilter (org.apache.carbondata.core.index.IndexFilter)1 IndexJob (org.apache.carbondata.core.index.IndexJob)1 Index (org.apache.carbondata.core.index.dev.Index)1 IndexInputSplitWrapper (org.apache.carbondata.core.index.dev.expr.IndexInputSplitWrapper)1 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)1 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)1 SegmentUpdateDetails (org.apache.carbondata.core.mutate.SegmentUpdateDetails)1