use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class TableIndex method pruneMultiThread.
private List<ExtendedBlocklet> pruneMultiThread(List<Segment> segments, final IndexFilter filter, List<ExtendedBlocklet> blocklets, final Map<Segment, List<Index>> indexes, int totalFiles) {
/*
*********************************************************************************
* Below is the example of how this part of code works.
* consider a scenario of having 5 segments, 10 indexes in each segment,
* and each index has one record. So total 50 records.
*
* indexes in each segment looks like below.
* s0 [0-9], s1 [0-9], s2 [0-9], s3[0-9], s4[0-9]
*
* If number of threads are 4. so filesPerEachThread = 50/4 = 12 files per each thread.
*
* SegmentIndexGroup look like below: [SegmentId, fromIndex, toIndex]
* In each segment only those indexes are processed between fromIndex and toIndex.
*
* Final result will be: (4 list created as numOfThreadsForPruning is 4)
* Thread1 list: s0 [0-9], s1 [0-1] : 12 files
* Thread2 list: s1 [2-9], s2 [0-3] : 12 files
* Thread3 list: s2 [4-9], s3 [0-5] : 12 files
* Thread4 list: s3 [6-9], s4 [0-9] : 14 files
* so each thread will process almost equal number of records.
*
*********************************************************************************
*/
int numOfThreadsForPruning = CarbonProperties.getNumOfThreadsForPruning();
int filesPerEachThread = totalFiles / numOfThreadsForPruning;
int prev;
int filesCount = 0;
int processedFileCount = 0;
List<List<SegmentIndexGroup>> indexListForEachThread = new ArrayList<>(numOfThreadsForPruning);
List<SegmentIndexGroup> segmentIndexGroupList = new ArrayList<>();
Set<String> missingSISegments = filter.getMissingSISegments();
for (Segment segment : segments) {
List<Index> eachSegmentIndexList = indexes.get(segment);
prev = 0;
for (int i = 0; i < eachSegmentIndexList.size(); i++) {
Index index = eachSegmentIndexList.get(i);
filesCount += index.getNumberOfEntries();
if (filesCount >= filesPerEachThread) {
if (indexListForEachThread.size() != numOfThreadsForPruning - 1) {
// not the last segmentList
segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, i));
// save the last value to process in next thread
prev = i + 1;
indexListForEachThread.add(segmentIndexGroupList);
segmentIndexGroupList = new ArrayList<>();
processedFileCount += filesCount;
filesCount = 0;
} else {
// add remaining in the end
processedFileCount += filesCount;
filesCount = 0;
}
}
}
if (prev == 0 || prev != eachSegmentIndexList.size()) {
// if prev == 0. Add a segment's all indexes
// eachSegmentIndexList.size() != prev, adding the last remaining indexes of this segment
segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, eachSegmentIndexList.size() - 1));
}
}
// adding the last segmentList data
indexListForEachThread.add(segmentIndexGroupList);
processedFileCount += filesCount;
if (processedFileCount != totalFiles) {
// this should not happen
throw new RuntimeException(" not all the files processed ");
}
if (indexListForEachThread.size() < numOfThreadsForPruning) {
// If the total indexes fitted in lesser number of threads than numOfThreadsForPruning.
// Launch only that many threads where indexes are fitted while grouping.
LOG.info("indexes is distributed in " + indexListForEachThread.size() + " threads");
numOfThreadsForPruning = indexListForEachThread.size();
}
LOG.info("Number of threads selected for multi-thread block pruning is " + numOfThreadsForPruning + ". total files: " + totalFiles + ". total segments: " + segments.size());
List<Future<Void>> results = new ArrayList<>(numOfThreadsForPruning);
final Map<Segment, List<ExtendedBlocklet>> prunedBlockletMap = new ConcurrentHashMap<>(segments.size());
final ExecutorService executorService = Executors.newFixedThreadPool(numOfThreadsForPruning);
final String threadName = Thread.currentThread().getName();
for (int i = 0; i < numOfThreadsForPruning; i++) {
final List<SegmentIndexGroup> segmentIndexGroups = indexListForEachThread.get(i);
results.add(executorService.submit(new Callable<Void>() {
@Override
public Void call() throws IOException {
Thread.currentThread().setName(threadName);
for (SegmentIndexGroup segmentIndexGroup : segmentIndexGroups) {
List<ExtendedBlocklet> pruneBlocklets = new ArrayList<>();
List<Index> indexList = indexes.get(segmentIndexGroup.getSegment());
SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentPropertiesFromIndex(indexList.get(0));
Segment segment = segmentIndexGroup.getSegment();
boolean isExternalOrMissingSISegment = segment.getSegmentPath() != null || (missingSISegments != null && missingSISegments.contains(segment.getSegmentNo()));
if (filter.isResolvedOnSegment(segmentProperties)) {
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
List<Blocklet> dmPruneBlocklets;
if (!isExternalOrMissingSISegment) {
dmPruneBlocklets = indexList.get(i).prune(filter.getResolver(), segmentProperties, filterExecutor, table);
} else {
dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentResolver(), segmentProperties, filterExecutor, table);
}
pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
}
} else {
Expression filterExpression = filter.getNewCopyOfExpression();
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
List<Blocklet> dmPruneBlocklets;
if (!isExternalOrMissingSISegment) {
dmPruneBlocklets = indexList.get(i).prune(filterExpression, segmentProperties, table, filterExecutor);
} else {
dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentFilter(), segmentProperties, table, filterExecutor);
}
pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
}
}
synchronized (prunedBlockletMap) {
List<ExtendedBlocklet> pruneBlockletsExisting = prunedBlockletMap.get(segmentIndexGroup.getSegment());
if (pruneBlockletsExisting != null) {
pruneBlockletsExisting.addAll(pruneBlocklets);
} else {
prunedBlockletMap.put(segmentIndexGroup.getSegment(), pruneBlocklets);
}
}
}
return null;
}
}));
}
executorService.shutdown();
try {
executorService.awaitTermination(2, TimeUnit.HOURS);
} catch (InterruptedException e) {
LOG.error("Error in pruning index in multi-thread: " + e.getMessage());
}
// check for error
for (Future<Void> result : results) {
try {
result.get();
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
}
for (Map.Entry<Segment, List<ExtendedBlocklet>> entry : prunedBlockletMap.entrySet()) {
blocklets.addAll(entry.getValue());
}
return blocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class TableIndex method prune.
/**
* This method is used from any machine after it is distributed. It takes the distributable object
* to prune the filters.
*
* @param distributable
* @param filterExp
* @return
*/
public List<ExtendedBlocklet> prune(List<Index> indices, IndexInputSplit distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
List<Blocklet> blocklets = new ArrayList<>();
Set<Path> partitionsToPrune = getPartitionLocations(partitions);
SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment(), partitionsToPrune);
FilterExecutor filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
for (Index index : indices) {
blocklets.addAll(index.prune(filterExp, segmentProperties, filterExecutor, table));
}
BlockletSerializer serializer = new BlockletSerializer();
String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + indexSchema.getIndexName();
if (indexFactory.getIndexLevel() == IndexLevel.FG) {
FileFactory.mkdirs(writePath);
}
for (Blocklet blocklet : blocklets) {
ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
if (indexFactory.getIndexLevel() == IndexLevel.FG) {
String blockletWritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
detailedBlocklet.setIndexWriterPath(blockletWritePath);
serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletWritePath);
}
detailedBlocklet.setSegment(distributable.getSegment());
detailedBlocklets.add(detailedBlocklet);
}
return detailedBlocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class TableIndex method pruneWithFilter.
private List<ExtendedBlocklet> pruneWithFilter(List<Segment> segments, IndexFilter filter, Set<Path> partitionLocations, List<ExtendedBlocklet> blocklets, Map<Segment, List<Index>> indexes) throws IOException {
Set<String> missingSISegments = filter.getMissingSISegments();
for (Segment segment : segments) {
List<Index> segmentIndices = indexes.get(segment);
if (segment == null || segmentIndices == null || segmentIndices.isEmpty()) {
continue;
}
boolean isExternalOrMissingSISegment = segment.isExternalSegment() || (missingSISegments != null && missingSISegments.contains(segment.getSegmentNo()));
List<Blocklet> pruneBlocklets = new ArrayList<>();
SegmentProperties segmentProperties;
if (segmentIndices.get(0) instanceof BlockIndex) {
segmentProperties = segmentPropertiesFetcher.getSegmentPropertiesFromIndex(segmentIndices.get(0));
} else {
segmentProperties = segmentPropertiesFetcher.getSegmentProperties(segment, partitionLocations);
}
if (filter.isResolvedOnSegment(segmentProperties)) {
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (Index index : segmentIndices) {
if (!isExternalOrMissingSISegment) {
pruneBlocklets.addAll(index.prune(filter.getResolver(), segmentProperties, filterExecutor, this.table));
} else {
pruneBlocklets.addAll(index.prune(filter.getExternalSegmentResolver(), segmentProperties, filterExecutor, this.table));
}
}
} else {
FilterExecutor filterExecutor;
Expression expression = filter.getExpression();
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, expression).getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, expression).getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (Index index : segmentIndices) {
if (!isExternalOrMissingSISegment) {
pruneBlocklets.addAll(index.prune(filter.getExpression(), segmentProperties, table, filterExecutor));
} else {
pruneBlocklets.addAll(index.prune(filter.getExternalSegmentFilter(), segmentProperties, table, filterExecutor));
}
}
}
blocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(pruneBlocklets, segment), segment));
}
return blocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class BlockletIndexFactory method getExtendedBlocklets.
/**
* Get the blocklet detail information based on blockletId, blockId and segmentId. This method is
* exclusively for BlockletIndexFactory as detail information is only available in this
* default index.
*/
@Override
public List<ExtendedBlocklet> getExtendedBlocklets(List<Blocklet> blocklets, Segment segment) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>(blocklets.size() + 1);
// if the blocklets is empty, return the empty detailed blocklets list directly.
if (blocklets.size() == 0) {
return detailedBlocklets;
}
// If it is already detailed blocklet then type cast and return same
if (blocklets.size() > 0 && blocklets.get(0) instanceof ExtendedBlocklet) {
for (Blocklet blocklet : blocklets) {
detailedBlocklets.add((ExtendedBlocklet) blocklet);
}
return detailedBlocklets;
}
Set<TableBlockIndexUniqueIdentifier> identifiers = getTableBlockIndexUniqueIdentifiers(segment);
Set<TableBlockIndexUniqueIdentifierWrapper> tableBlockIndexUniqueIdentifierWrappers = new HashSet<>(identifiers.size());
for (TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier : identifiers) {
tableBlockIndexUniqueIdentifierWrappers.add(new TableBlockIndexUniqueIdentifierWrapper(tableBlockIndexUniqueIdentifier, this.getCarbonTable()));
}
// Retrieve each blocklets detail information from blocklet index
for (Blocklet blocklet : blocklets) {
detailedBlocklets.add(getExtendedBlocklet(tableBlockIndexUniqueIdentifierWrappers, blocklet));
}
return detailedBlocklets;
}
use of org.apache.carbondata.core.indexstore.Blocklet in project carbondata by apache.
the class BlockIndex method prune.
private List<Blocklet> prune(FilterResolverIntf filterExp, FilterExecutor filterExecutor, SegmentProperties segmentProperties) {
if (memoryDMStore.getRowCount() == 0) {
return new ArrayList<>();
}
List<Blocklet> blocklets = new ArrayList<>();
CarbonRowSchema[] schema = getFileFooterEntrySchema();
String filePath = getFilePath();
int numEntries = memoryDMStore.getRowCount();
int totalBlocklets = 0;
if (ExplainCollector.enabled()) {
totalBlocklets = getTotalBlocklets();
}
int hitBlocklets = 0;
if (filterExp == null) {
for (int i = 0; i < numEntries; i++) {
IndexRow indexRow = memoryDMStore.getIndexRow(schema, i);
blocklets.add(createBlocklet(indexRow, getFileNameWithFilePath(indexRow, filePath), getBlockletId(indexRow), false));
}
hitBlocklets = totalBlocklets;
} else {
// Remove B-tree jump logic as start and end key prepared is not
// correct for old store scenarios
int entryIndex = 0;
// flag to be used for deciding whether use min/max in executor pruning for BlockletIndex
boolean useMinMaxForPruning = useMinMaxForExecutorPruning(filterExp);
if (!validateSegmentProperties(segmentProperties)) {
filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, getSegmentProperties(), null, getMinMaxCacheColumns(), false);
}
// min and max for executor pruning
while (entryIndex < numEntries) {
IndexRow row = memoryDMStore.getIndexRow(schema, entryIndex);
boolean[] minMaxFlag = getMinMaxFlag(row, BLOCK_MIN_MAX_FLAG);
String fileName = getFileNameWithFilePath(row, filePath);
short blockletId = getBlockletId(row);
boolean isValid = addBlockBasedOnMinMaxValue(filterExecutor, getMinMaxValue(row, MAX_VALUES_INDEX), getMinMaxValue(row, MIN_VALUES_INDEX), minMaxFlag, fileName, blockletId);
if (isValid) {
blocklets.add(createBlocklet(row, fileName, blockletId, useMinMaxForPruning));
if (ExplainCollector.enabled()) {
hitBlocklets += getBlockletNumOfEntry(entryIndex);
}
}
entryIndex++;
}
}
if (ExplainCollector.enabled()) {
ExplainCollector.setShowPruningInfo(true);
ExplainCollector.addTotalBlocklets(totalBlocklets);
ExplainCollector.addTotalBlocks(getTotalBlocks());
ExplainCollector.addDefaultIndexPruningHit(hitBlocklets);
}
return blocklets;
}
Aggregations