use of org.apache.carbondata.core.index.IndexChooser in project carbondata by apache.
the class CarbonInputFormat method getPrunedBlocklets.
/**
* Prune the blocklets using the filter expression with available index.
* First pruned with default blocklet index, then pruned with CG and FG index
*/
public List<ExtendedBlocklet> getPrunedBlocklets(JobContext job, CarbonTable carbonTable, IndexFilter filter, List<Segment> validSegments, List<Segment> invalidSegments, List<String> segmentsToBeRefreshed) throws IOException {
ExplainCollector.addPruningInfo(carbonTable.getTableName());
filter = filter == null ? new IndexFilter(carbonTable, null) : filter;
ExplainCollector.setFilterStatement(filter.getExpression() == null ? "none" : filter.getExpression().getStatement());
boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_INDEX, CarbonCommonConstants.USE_DISTRIBUTED_INDEX_DEFAULT));
IndexJob indexJob = IndexUtil.getIndexJob(job.getConfiguration());
List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
// First prune using default index on driver side.
TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(carbonTable);
List<ExtendedBlocklet> prunedBlocklets;
// This is to log the event, so user will know what is happening by seeing logs.
LOG.info("Started block pruning ...");
boolean isDistributedPruningEnabled = CarbonProperties.getInstance().isDistributedPruningEnabled(carbonTable.getDatabaseName(), carbonTable.getTableName());
boolean isIndexServerContext = job.getConfiguration().get("isIndexServerContext", "false").equals("true");
if (isDistributedPruningEnabled && !isIndexServerContext) {
try {
prunedBlocklets = getDistributedSplit(carbonTable, filter.getResolver(), partitionsToPrune, validSegments, invalidSegments, segmentsToBeRefreshed, false, job.getConfiguration(), filter.getMissingSISegments());
} catch (Exception e) {
// pruning.
if (CarbonProperties.getInstance().isFallBackDisabled()) {
throw e;
}
prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
}
} else {
if (carbonTable.isTransactionalTable()) {
IndexExprWrapper indexExprWrapper = IndexChooser.getDefaultIndex(getOrCreateCarbonTable(job.getConfiguration()), null);
IndexUtil.loadIndexes(carbonTable, indexExprWrapper, validSegments);
}
prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
if (ExplainCollector.enabled()) {
ExplainCollector.setDefaultIndexPruningBlockHit(getBlockCount(prunedBlocklets));
}
if (prunedBlocklets.size() == 0) {
return prunedBlocklets;
}
IndexChooser chooser = new IndexChooser(getOrCreateCarbonTable(job.getConfiguration()), isSecondaryIndexPruningEnabled(job.getConfiguration()));
// Get the available CG indexes and prune further.
IndexExprWrapper cgIndexExprWrapper = chooser.chooseCGIndex(filter.getResolver());
if (cgIndexExprWrapper != null) {
// Prune segments from already pruned blocklets
IndexUtil.pruneSegments(validSegments, prunedBlocklets);
List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
// If SI present in cgIndexExprWrapper then set the list of
// blocklet in segment which are pruned by default index,
// and this list will be return from SI prune method if segment is not present in SI.
Map<String, List<ExtendedBlocklet>> segmentsToBlocklet = new HashMap<>();
for (ExtendedBlocklet extendedBlocklet : prunedBlocklets) {
List<ExtendedBlocklet> extendedBlockletList = segmentsToBlocklet.getOrDefault(extendedBlocklet.getSegmentId(), new ArrayList<>());
extendedBlockletList.add(extendedBlocklet);
segmentsToBlocklet.put(extendedBlocklet.getSegmentId(), extendedBlockletList);
}
for (Segment seg : validSegments) {
seg.setDefaultIndexPrunedBlocklets(segmentsToBlocklet.get(seg.getSegmentNo()));
}
boolean isCGPruneFallback = false;
// Again prune with CG index.
try {
if (distributedCG && indexJob != null) {
cgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, IndexLevel.CG, new ArrayList<>(), job.getConfiguration());
} else {
cgPrunedBlocklets = cgIndexExprWrapper.prune(validSegments, partitionsToPrune);
}
} catch (Exception e) {
isCGPruneFallback = true;
LOG.error("CG index pruning failed.", e);
}
// hence no need to do intersect and simply pass the prunedBlocklets from default index
if (!isCGPruneFallback) {
if (isIndexServerContext) {
// For all blocklets initialize the detail info so that it can be serialized to driver
for (ExtendedBlocklet blocklet : cgPrunedBlocklets) {
blocklet.getDetailInfo();
blocklet.setCgIndexPresent(true);
}
}
// since index index prune in segment scope,
// the result need to intersect with previous pruned result
prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
}
if (ExplainCollector.enabled()) {
ExplainCollector.recordCGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(cgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
}
}
if (prunedBlocklets.size() == 0) {
return prunedBlocklets;
}
// Now try to prune with FG Index.
if (isFgIndexPruningEnable(job.getConfiguration()) && indexJob != null) {
IndexExprWrapper fgIndexExprWrapper = chooser.chooseFGIndex(filter.getResolver());
List<ExtendedBlocklet> fgPrunedBlocklets;
if (fgIndexExprWrapper != null) {
// Prune segments from already pruned blocklets
IndexUtil.pruneSegments(validSegments, prunedBlocklets);
// Prune segments from already pruned blocklets
fgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, fgIndexExprWrapper.getIndexLevel(), new ArrayList<>(), job.getConfiguration());
// note that the 'fgPrunedBlocklets' has extra index related info compared with
// 'prunedBlocklets', so the intersection should keep the elements in 'fgPrunedBlocklets'
prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, fgPrunedBlocklets);
ExplainCollector.recordFGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(fgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
}
}
}
LOG.info("Finished block pruning ...");
return prunedBlocklets;
}
Aggregations