Search in sources :

Example 1 with IndexJob

use of org.apache.carbondata.core.index.IndexJob in project carbondata by apache.

the class CarbonInputFormat method getPrunedBlocklets.

/**
 * Prune the blocklets using the filter expression with available index.
 * First pruned with default blocklet index, then pruned with CG and FG index
 */
public List<ExtendedBlocklet> getPrunedBlocklets(JobContext job, CarbonTable carbonTable, IndexFilter filter, List<Segment> validSegments, List<Segment> invalidSegments, List<String> segmentsToBeRefreshed) throws IOException {
    ExplainCollector.addPruningInfo(carbonTable.getTableName());
    filter = filter == null ? new IndexFilter(carbonTable, null) : filter;
    ExplainCollector.setFilterStatement(filter.getExpression() == null ? "none" : filter.getExpression().getStatement());
    boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_INDEX, CarbonCommonConstants.USE_DISTRIBUTED_INDEX_DEFAULT));
    IndexJob indexJob = IndexUtil.getIndexJob(job.getConfiguration());
    List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
    // First prune using default index on driver side.
    TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(carbonTable);
    List<ExtendedBlocklet> prunedBlocklets;
    // This is to log the event, so user will know what is happening by seeing logs.
    LOG.info("Started block pruning ...");
    boolean isDistributedPruningEnabled = CarbonProperties.getInstance().isDistributedPruningEnabled(carbonTable.getDatabaseName(), carbonTable.getTableName());
    boolean isIndexServerContext = job.getConfiguration().get("isIndexServerContext", "false").equals("true");
    if (isDistributedPruningEnabled && !isIndexServerContext) {
        try {
            prunedBlocklets = getDistributedSplit(carbonTable, filter.getResolver(), partitionsToPrune, validSegments, invalidSegments, segmentsToBeRefreshed, false, job.getConfiguration(), filter.getMissingSISegments());
        } catch (Exception e) {
            // pruning.
            if (CarbonProperties.getInstance().isFallBackDisabled()) {
                throw e;
            }
            prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
        }
    } else {
        if (carbonTable.isTransactionalTable()) {
            IndexExprWrapper indexExprWrapper = IndexChooser.getDefaultIndex(getOrCreateCarbonTable(job.getConfiguration()), null);
            IndexUtil.loadIndexes(carbonTable, indexExprWrapper, validSegments);
        }
        prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
        if (ExplainCollector.enabled()) {
            ExplainCollector.setDefaultIndexPruningBlockHit(getBlockCount(prunedBlocklets));
        }
        if (prunedBlocklets.size() == 0) {
            return prunedBlocklets;
        }
        IndexChooser chooser = new IndexChooser(getOrCreateCarbonTable(job.getConfiguration()), isSecondaryIndexPruningEnabled(job.getConfiguration()));
        // Get the available CG indexes and prune further.
        IndexExprWrapper cgIndexExprWrapper = chooser.chooseCGIndex(filter.getResolver());
        if (cgIndexExprWrapper != null) {
            // Prune segments from already pruned blocklets
            IndexUtil.pruneSegments(validSegments, prunedBlocklets);
            List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
            // If SI present in cgIndexExprWrapper then set the list of
            // blocklet in segment which are pruned by default index,
            // and this list will be return from SI prune method if segment is not present in SI.
            Map<String, List<ExtendedBlocklet>> segmentsToBlocklet = new HashMap<>();
            for (ExtendedBlocklet extendedBlocklet : prunedBlocklets) {
                List<ExtendedBlocklet> extendedBlockletList = segmentsToBlocklet.getOrDefault(extendedBlocklet.getSegmentId(), new ArrayList<>());
                extendedBlockletList.add(extendedBlocklet);
                segmentsToBlocklet.put(extendedBlocklet.getSegmentId(), extendedBlockletList);
            }
            for (Segment seg : validSegments) {
                seg.setDefaultIndexPrunedBlocklets(segmentsToBlocklet.get(seg.getSegmentNo()));
            }
            boolean isCGPruneFallback = false;
            // Again prune with CG index.
            try {
                if (distributedCG && indexJob != null) {
                    cgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, IndexLevel.CG, new ArrayList<>(), job.getConfiguration());
                } else {
                    cgPrunedBlocklets = cgIndexExprWrapper.prune(validSegments, partitionsToPrune);
                }
            } catch (Exception e) {
                isCGPruneFallback = true;
                LOG.error("CG index pruning failed.", e);
            }
            // hence no need to do intersect and simply pass the prunedBlocklets from default index
            if (!isCGPruneFallback) {
                if (isIndexServerContext) {
                    // For all blocklets initialize the detail info so that it can be serialized to driver
                    for (ExtendedBlocklet blocklet : cgPrunedBlocklets) {
                        blocklet.getDetailInfo();
                        blocklet.setCgIndexPresent(true);
                    }
                }
                // since index index prune in segment scope,
                // the result need to intersect with previous pruned result
                prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
            }
            if (ExplainCollector.enabled()) {
                ExplainCollector.recordCGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(cgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
            }
        }
        if (prunedBlocklets.size() == 0) {
            return prunedBlocklets;
        }
        // Now try to prune with FG Index.
        if (isFgIndexPruningEnable(job.getConfiguration()) && indexJob != null) {
            IndexExprWrapper fgIndexExprWrapper = chooser.chooseFGIndex(filter.getResolver());
            List<ExtendedBlocklet> fgPrunedBlocklets;
            if (fgIndexExprWrapper != null) {
                // Prune segments from already pruned blocklets
                IndexUtil.pruneSegments(validSegments, prunedBlocklets);
                // Prune segments from already pruned blocklets
                fgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, fgIndexExprWrapper.getIndexLevel(), new ArrayList<>(), job.getConfiguration());
                // note that the 'fgPrunedBlocklets' has extra index related info compared with
                // 'prunedBlocklets', so the intersection should keep the elements in 'fgPrunedBlocklets'
                prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, fgPrunedBlocklets);
                ExplainCollector.recordFGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(fgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
            }
        }
    }
    LOG.info("Finished block pruning ...");
    return prunedBlocklets;
}
Also used : IndexJob(org.apache.carbondata.core.index.IndexJob) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TableIndex(org.apache.carbondata.core.index.TableIndex) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) InvalidConfigurationException(org.apache.carbondata.core.exception.InvalidConfigurationException) IOException(java.io.IOException) Segment(org.apache.carbondata.core.index.Segment) IndexChooser(org.apache.carbondata.core.index.IndexChooser) List(java.util.List) ArrayList(java.util.ArrayList) IndexFilter(org.apache.carbondata.core.index.IndexFilter) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) IndexExprWrapper(org.apache.carbondata.core.index.dev.expr.IndexExprWrapper)

Example 2 with IndexJob

use of org.apache.carbondata.core.index.IndexJob in project carbondata by apache.

the class CarbonInputFormat method getDistributedCount.

/**
 * This method will execute a distributed job to get the count for the
 * table. If the job fails for some reason then an embedded job is fired to
 * get the count.
 */
Long getDistributedCount(CarbonTable table, List<PartitionSpec> partitionNames, List<Segment> validSegments, Configuration configuration) {
    IndexInputFormat indexInputFormat = new IndexInputFormat(table, null, validSegments, new ArrayList<>(), partitionNames, false, null, false, false);
    indexInputFormat.setIsWriteToFile(false);
    try {
        IndexJob indexJob = (IndexJob) IndexUtil.createIndexJob(IndexUtil.DISTRIBUTED_JOB_NAME);
        if (indexJob == null) {
            throw new ExceptionInInitializerError("Unable to create index job");
        }
        return indexJob.executeCountJob(indexInputFormat, configuration);
    } catch (Exception e) {
        if (CarbonProperties.getInstance().isFallBackDisabled()) {
            LOG.error("Fallback is disabled");
            throw e;
        }
        LOG.error("Failed to get count from index server. Initializing fallback", e);
        IndexJob indexJob = IndexUtil.getEmbeddedJob();
        return indexJob.executeCountJob(indexInputFormat, configuration);
    }
}
Also used : IndexJob(org.apache.carbondata.core.index.IndexJob) IndexInputFormat(org.apache.carbondata.core.index.IndexInputFormat) InvalidConfigurationException(org.apache.carbondata.core.exception.InvalidConfigurationException) IOException(java.io.IOException)

Aggregations

IOException (java.io.IOException)2 InvalidConfigurationException (org.apache.carbondata.core.exception.InvalidConfigurationException)2 IndexJob (org.apache.carbondata.core.index.IndexJob)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 IndexChooser (org.apache.carbondata.core.index.IndexChooser)1 IndexFilter (org.apache.carbondata.core.index.IndexFilter)1 IndexInputFormat (org.apache.carbondata.core.index.IndexInputFormat)1 Segment (org.apache.carbondata.core.index.Segment)1 TableIndex (org.apache.carbondata.core.index.TableIndex)1 IndexExprWrapper (org.apache.carbondata.core.index.dev.expr.IndexExprWrapper)1 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)1 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)1