use of org.apache.carbondata.core.index.TableIndex in project carbondata by apache.
the class CarbonInputFormat method getPrunedBlocklets.
/**
* Prune the blocklets using the filter expression with available index.
* First pruned with default blocklet index, then pruned with CG and FG index
*/
public List<ExtendedBlocklet> getPrunedBlocklets(JobContext job, CarbonTable carbonTable, IndexFilter filter, List<Segment> validSegments, List<Segment> invalidSegments, List<String> segmentsToBeRefreshed) throws IOException {
ExplainCollector.addPruningInfo(carbonTable.getTableName());
filter = filter == null ? new IndexFilter(carbonTable, null) : filter;
ExplainCollector.setFilterStatement(filter.getExpression() == null ? "none" : filter.getExpression().getStatement());
boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_INDEX, CarbonCommonConstants.USE_DISTRIBUTED_INDEX_DEFAULT));
IndexJob indexJob = IndexUtil.getIndexJob(job.getConfiguration());
List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
// First prune using default index on driver side.
TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(carbonTable);
List<ExtendedBlocklet> prunedBlocklets;
// This is to log the event, so user will know what is happening by seeing logs.
LOG.info("Started block pruning ...");
boolean isDistributedPruningEnabled = CarbonProperties.getInstance().isDistributedPruningEnabled(carbonTable.getDatabaseName(), carbonTable.getTableName());
boolean isIndexServerContext = job.getConfiguration().get("isIndexServerContext", "false").equals("true");
if (isDistributedPruningEnabled && !isIndexServerContext) {
try {
prunedBlocklets = getDistributedSplit(carbonTable, filter.getResolver(), partitionsToPrune, validSegments, invalidSegments, segmentsToBeRefreshed, false, job.getConfiguration(), filter.getMissingSISegments());
} catch (Exception e) {
// pruning.
if (CarbonProperties.getInstance().isFallBackDisabled()) {
throw e;
}
prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
}
} else {
if (carbonTable.isTransactionalTable()) {
IndexExprWrapper indexExprWrapper = IndexChooser.getDefaultIndex(getOrCreateCarbonTable(job.getConfiguration()), null);
IndexUtil.loadIndexes(carbonTable, indexExprWrapper, validSegments);
}
prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
if (ExplainCollector.enabled()) {
ExplainCollector.setDefaultIndexPruningBlockHit(getBlockCount(prunedBlocklets));
}
if (prunedBlocklets.size() == 0) {
return prunedBlocklets;
}
IndexChooser chooser = new IndexChooser(getOrCreateCarbonTable(job.getConfiguration()), isSecondaryIndexPruningEnabled(job.getConfiguration()));
// Get the available CG indexes and prune further.
IndexExprWrapper cgIndexExprWrapper = chooser.chooseCGIndex(filter.getResolver());
if (cgIndexExprWrapper != null) {
// Prune segments from already pruned blocklets
IndexUtil.pruneSegments(validSegments, prunedBlocklets);
List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
// If SI present in cgIndexExprWrapper then set the list of
// blocklet in segment which are pruned by default index,
// and this list will be return from SI prune method if segment is not present in SI.
Map<String, List<ExtendedBlocklet>> segmentsToBlocklet = new HashMap<>();
for (ExtendedBlocklet extendedBlocklet : prunedBlocklets) {
List<ExtendedBlocklet> extendedBlockletList = segmentsToBlocklet.getOrDefault(extendedBlocklet.getSegmentId(), new ArrayList<>());
extendedBlockletList.add(extendedBlocklet);
segmentsToBlocklet.put(extendedBlocklet.getSegmentId(), extendedBlockletList);
}
for (Segment seg : validSegments) {
seg.setDefaultIndexPrunedBlocklets(segmentsToBlocklet.get(seg.getSegmentNo()));
}
boolean isCGPruneFallback = false;
// Again prune with CG index.
try {
if (distributedCG && indexJob != null) {
cgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, IndexLevel.CG, new ArrayList<>(), job.getConfiguration());
} else {
cgPrunedBlocklets = cgIndexExprWrapper.prune(validSegments, partitionsToPrune);
}
} catch (Exception e) {
isCGPruneFallback = true;
LOG.error("CG index pruning failed.", e);
}
// hence no need to do intersect and simply pass the prunedBlocklets from default index
if (!isCGPruneFallback) {
if (isIndexServerContext) {
// For all blocklets initialize the detail info so that it can be serialized to driver
for (ExtendedBlocklet blocklet : cgPrunedBlocklets) {
blocklet.getDetailInfo();
blocklet.setCgIndexPresent(true);
}
}
// since index index prune in segment scope,
// the result need to intersect with previous pruned result
prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
}
if (ExplainCollector.enabled()) {
ExplainCollector.recordCGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(cgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
}
}
if (prunedBlocklets.size() == 0) {
return prunedBlocklets;
}
// Now try to prune with FG Index.
if (isFgIndexPruningEnable(job.getConfiguration()) && indexJob != null) {
IndexExprWrapper fgIndexExprWrapper = chooser.chooseFGIndex(filter.getResolver());
List<ExtendedBlocklet> fgPrunedBlocklets;
if (fgIndexExprWrapper != null) {
// Prune segments from already pruned blocklets
IndexUtil.pruneSegments(validSegments, prunedBlocklets);
// Prune segments from already pruned blocklets
fgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, fgIndexExprWrapper.getIndexLevel(), new ArrayList<>(), job.getConfiguration());
// note that the 'fgPrunedBlocklets' has extra index related info compared with
// 'prunedBlocklets', so the intersection should keep the elements in 'fgPrunedBlocklets'
prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, fgPrunedBlocklets);
ExplainCollector.recordFGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(fgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
}
}
}
LOG.info("Finished block pruning ...");
return prunedBlocklets;
}
use of org.apache.carbondata.core.index.TableIndex in project carbondata by apache.
the class SegmentFileStore method clearBlockIndexCache.
/**
* After updating table status file clear the index cache for all segmentId's on which
* index is being created because flows like merge index file creation involves modification of
* segment file and once segment file is modified the cache for that segment need to be cleared
* otherwise the old cache will be used which is stale
*
* @param carbonTable
* @param segmentId
*/
public static void clearBlockIndexCache(CarbonTable carbonTable, String segmentId) {
TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(carbonTable);
LOGGER.info("clearing cache while updating segment file entry in table status file for segmentId: " + segmentId);
defaultIndex.getIndexFactory().clear(segmentId);
}
use of org.apache.carbondata.core.index.TableIndex in project carbondata by apache.
the class IndexWriterListener method registerAllWriter.
/**
* register all index writer for specified table and segment
*/
public void registerAllWriter(CarbonTable carbonTable, String segmentId, String taskNo, SegmentProperties segmentProperties) {
// clear cache in executor side
IndexStoreManager.getInstance().clearIndex(carbonTable.getTableId());
List<TableIndex> tableIndices;
try {
tableIndices = IndexStoreManager.getInstance().getAllCGAndFGIndexes(carbonTable);
} catch (IOException e) {
LOG.error("Error while retrieving indexes", e);
throw new RuntimeException(e);
}
tblIdentifier = carbonTable.getCarbonTableIdentifier();
for (TableIndex tableIndex : tableIndices) {
// will rebuild the index manually
if (!tableIndex.getIndexSchema().isLazy() && !tableIndex.getIndexSchema().getProviderName().equals(IndexType.SI.getIndexProviderName())) {
IndexFactory factory = tableIndex.getIndexFactory();
register(factory, segmentId, taskNo, segmentProperties);
}
}
}
use of org.apache.carbondata.core.index.TableIndex in project carbondata by apache.
the class CarbonTableInputFormat method getBlockRowCount.
/**
* Get the row count of the Block and mapping of segment and Block count.
*/
public BlockMappingVO getBlockRowCount(Job job, CarbonTable table, List<PartitionSpec> partitions, boolean isUpdateFlow) throws IOException {
// Normal query flow goes to CarbonInputFormat#getPrunedBlocklets and initialize the
// pruning info for table we queried. But here count star query without filter uses a different
// query plan, and no pruning info is initialized. When it calls default index to
// prune(with a null filter), exception will occur during setting pruning info.
// Considering no useful information about block/blocklet pruning for such query
// (actually no pruning), so we disable explain collector here
ExplainCollector.remove();
AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
ReadCommittedScope readCommittedScope = getReadCommitted(job, identifier);
LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table, loadMetadataDetails);
SegmentStatusManager.ValidAndInvalidSegmentsInfo allSegments = new SegmentStatusManager(identifier, readCommittedScope.getConfiguration()).getValidAndInvalidSegments(table.isMV(), loadMetadataDetails, readCommittedScope);
Map<String, Long> blockRowCountMapping = new HashMap<>();
Map<String, Long> segmentAndBlockCountMapping = new HashMap<>();
Map<String, String> blockToSegmentMapping = new HashMap<>();
// TODO: currently only batch segment is supported, add support for streaming table
List<Segment> filteredSegment = getFilteredSegment(job, allSegments.getValidSegments(), false, readCommittedScope);
boolean isIUDTable = (updateStatusManager.getUpdateStatusDetails().length != 0);
/* In the select * flow, getSplits() method was clearing the segmentMap if,
segment needs refreshing. same thing need for select count(*) flow also.
For NonTransactional table, one of the reason for a segment refresh is below scenario.
SDK is written one set of files with UUID, with same UUID it can write again.
So, latest files content should reflect the new count by refreshing the segment */
List<String> toBeCleanedSegments = new ArrayList<>();
for (Segment segment : filteredSegment) {
boolean refreshNeeded = IndexStoreManager.getInstance().getTableSegmentRefresher(getOrCreateCarbonTable(job.getConfiguration())).isRefreshNeeded(segment, SegmentUpdateStatusManager.getInvalidTimestampRange(segment.getLoadMetadataDetails()));
if (refreshNeeded) {
toBeCleanedSegments.add(segment.getSegmentNo());
}
}
for (Segment segment : allSegments.getInvalidSegments()) {
// remove entry in the segment index if there are invalid segments
toBeCleanedSegments.add(segment.getSegmentNo());
}
if (toBeCleanedSegments.size() > 0) {
IndexStoreManager.getInstance().clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()), toBeCleanedSegments);
}
IndexExprWrapper indexExprWrapper = IndexChooser.getDefaultIndex(getOrCreateCarbonTable(job.getConfiguration()), null);
IndexUtil.loadIndexes(table, indexExprWrapper, filteredSegment);
if (isIUDTable || isUpdateFlow) {
Map<String, Long> blockletToRowCountMap = new HashMap<>();
if (CarbonProperties.getInstance().isDistributedPruningEnabled(table.getDatabaseName(), table.getTableName())) {
try {
List<ExtendedBlocklet> extendedBlocklets = getDistributedBlockRowCount(table, partitions, filteredSegment, allSegments.getInvalidSegments(), toBeCleanedSegments, job.getConfiguration());
for (ExtendedBlocklet blocklet : extendedBlocklets) {
String filePath = blocklet.getFilePath().replace("\\", "/");
String blockName = filePath.substring(filePath.lastIndexOf("/") + 1);
blockletToRowCountMap.put(blocklet.getSegmentId() + "," + blockName, blocklet.getRowCount());
}
} catch (Exception e) {
// pruning.
if (CarbonProperties.getInstance().isFallBackDisabled()) {
throw e;
}
TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(table);
blockletToRowCountMap.putAll(defaultIndex.getBlockRowCount(filteredSegment, partitions, defaultIndex));
}
} else {
TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(table);
blockletToRowCountMap.putAll(defaultIndex.getBlockRowCount(filteredSegment, partitions, defaultIndex));
}
// key is the (segmentId","+blockletPath) and key is the row count of that blocklet
for (Map.Entry<String, Long> eachBlocklet : blockletToRowCountMap.entrySet()) {
String[] segmentIdAndPath = eachBlocklet.getKey().split(",", 2);
String segmentId = segmentIdAndPath[0];
String blockName = segmentIdAndPath[1];
long rowCount = eachBlocklet.getValue();
String key = CarbonUpdateUtil.getSegmentBlockNameKey(segmentId, blockName, table.isHivePartitionTable());
// if block is invalid then don't add the count
SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
Long blockCount = blockRowCountMapping.get(key);
if (blockCount == null) {
blockCount = 0L;
Long count = segmentAndBlockCountMapping.get(segmentId);
if (count == null) {
count = 0L;
}
segmentAndBlockCountMapping.put(segmentId, count + 1);
}
blockToSegmentMapping.put(key, segmentId);
blockCount += rowCount;
blockRowCountMapping.put(key, blockCount);
}
}
} else {
long totalRowCount;
if (CarbonProperties.getInstance().isDistributedPruningEnabled(table.getDatabaseName(), table.getTableName())) {
totalRowCount = getDistributedCount(table, partitions, filteredSegment, job.getConfiguration());
} else {
TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(table);
totalRowCount = defaultIndex.getRowCount(filteredSegment, partitions, defaultIndex);
}
blockRowCountMapping.put(CarbonCommonConstantsInternal.ROW_COUNT, totalRowCount);
}
BlockMappingVO blockMappingVO = new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
blockMappingVO.setBlockToSegmentMapping(blockToSegmentMapping);
return blockMappingVO;
}
use of org.apache.carbondata.core.index.TableIndex in project carbondata by apache.
the class CarbonTable method getAllVisibleIndexes.
/**
* It only gives the visible Indexes
*/
public List<TableIndex> getAllVisibleIndexes() throws IOException {
CarbonSessionInfo sessionInfo = ThreadLocalSessionInfo.getCarbonSessionInfo();
List<TableIndex> allIndexes = IndexStoreManager.getInstance().getAllCGAndFGIndexes(this);
Iterator<TableIndex> indexIterator = allIndexes.iterator();
while (indexIterator.hasNext()) {
TableIndex index = indexIterator.next();
String dbName = this.getDatabaseName();
String tableName = this.getTableName();
String indexName = index.getIndexSchema().getIndexName();
// TODO: need support get the visible status of Index without sessionInfo in the future
if (sessionInfo != null) {
boolean isIndexVisible = sessionInfo.getSessionParams().getProperty(String.format("%s%s.%s.%s", CarbonCommonConstants.CARBON_INDEX_VISIBLE, dbName, tableName, indexName), "true").trim().equalsIgnoreCase("true");
if (!isIndexVisible) {
LOGGER.warn(String.format("Ignore invisible index %s on table %s.%s", indexName, dbName, tableName));
indexIterator.remove();
}
} else {
String message = "Carbon session info is null";
LOGGER.info(message);
}
}
return allIndexes;
}
Aggregations